annotate src/bzip2-1.0.6/blocksort.c @ 4:e13257ea84a4

Add bzip2, zlib, liblo, portaudio sources
author Chris Cannam
date Wed, 20 Mar 2013 13:59:52 +0000
parents
children
rev   line source
Chris@4 1
Chris@4 2 /*-------------------------------------------------------------*/
Chris@4 3 /*--- Block sorting machinery ---*/
Chris@4 4 /*--- blocksort.c ---*/
Chris@4 5 /*-------------------------------------------------------------*/
Chris@4 6
Chris@4 7 /* ------------------------------------------------------------------
Chris@4 8 This file is part of bzip2/libbzip2, a program and library for
Chris@4 9 lossless, block-sorting data compression.
Chris@4 10
Chris@4 11 bzip2/libbzip2 version 1.0.6 of 6 September 2010
Chris@4 12 Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
Chris@4 13
Chris@4 14 Please read the WARNING, DISCLAIMER and PATENTS sections in the
Chris@4 15 README file.
Chris@4 16
Chris@4 17 This program is released under the terms of the license contained
Chris@4 18 in the file LICENSE.
Chris@4 19 ------------------------------------------------------------------ */
Chris@4 20
Chris@4 21
Chris@4 22 #include "bzlib_private.h"
Chris@4 23
Chris@4 24 /*---------------------------------------------*/
Chris@4 25 /*--- Fallback O(N log(N)^2) sorting ---*/
Chris@4 26 /*--- algorithm, for repetitive blocks ---*/
Chris@4 27 /*---------------------------------------------*/
Chris@4 28
Chris@4 29 /*---------------------------------------------*/
Chris@4 30 static
Chris@4 31 __inline__
Chris@4 32 void fallbackSimpleSort ( UInt32* fmap,
Chris@4 33 UInt32* eclass,
Chris@4 34 Int32 lo,
Chris@4 35 Int32 hi )
Chris@4 36 {
Chris@4 37 Int32 i, j, tmp;
Chris@4 38 UInt32 ec_tmp;
Chris@4 39
Chris@4 40 if (lo == hi) return;
Chris@4 41
Chris@4 42 if (hi - lo > 3) {
Chris@4 43 for ( i = hi-4; i >= lo; i-- ) {
Chris@4 44 tmp = fmap[i];
Chris@4 45 ec_tmp = eclass[tmp];
Chris@4 46 for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
Chris@4 47 fmap[j-4] = fmap[j];
Chris@4 48 fmap[j-4] = tmp;
Chris@4 49 }
Chris@4 50 }
Chris@4 51
Chris@4 52 for ( i = hi-1; i >= lo; i-- ) {
Chris@4 53 tmp = fmap[i];
Chris@4 54 ec_tmp = eclass[tmp];
Chris@4 55 for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
Chris@4 56 fmap[j-1] = fmap[j];
Chris@4 57 fmap[j-1] = tmp;
Chris@4 58 }
Chris@4 59 }
Chris@4 60
Chris@4 61
Chris@4 62 /*---------------------------------------------*/
Chris@4 63 #define fswap(zz1, zz2) \
Chris@4 64 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
Chris@4 65
Chris@4 66 #define fvswap(zzp1, zzp2, zzn) \
Chris@4 67 { \
Chris@4 68 Int32 yyp1 = (zzp1); \
Chris@4 69 Int32 yyp2 = (zzp2); \
Chris@4 70 Int32 yyn = (zzn); \
Chris@4 71 while (yyn > 0) { \
Chris@4 72 fswap(fmap[yyp1], fmap[yyp2]); \
Chris@4 73 yyp1++; yyp2++; yyn--; \
Chris@4 74 } \
Chris@4 75 }
Chris@4 76
Chris@4 77
Chris@4 78 #define fmin(a,b) ((a) < (b)) ? (a) : (b)
Chris@4 79
Chris@4 80 #define fpush(lz,hz) { stackLo[sp] = lz; \
Chris@4 81 stackHi[sp] = hz; \
Chris@4 82 sp++; }
Chris@4 83
Chris@4 84 #define fpop(lz,hz) { sp--; \
Chris@4 85 lz = stackLo[sp]; \
Chris@4 86 hz = stackHi[sp]; }
Chris@4 87
Chris@4 88 #define FALLBACK_QSORT_SMALL_THRESH 10
Chris@4 89 #define FALLBACK_QSORT_STACK_SIZE 100
Chris@4 90
Chris@4 91
Chris@4 92 static
Chris@4 93 void fallbackQSort3 ( UInt32* fmap,
Chris@4 94 UInt32* eclass,
Chris@4 95 Int32 loSt,
Chris@4 96 Int32 hiSt )
Chris@4 97 {
Chris@4 98 Int32 unLo, unHi, ltLo, gtHi, n, m;
Chris@4 99 Int32 sp, lo, hi;
Chris@4 100 UInt32 med, r, r3;
Chris@4 101 Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
Chris@4 102 Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
Chris@4 103
Chris@4 104 r = 0;
Chris@4 105
Chris@4 106 sp = 0;
Chris@4 107 fpush ( loSt, hiSt );
Chris@4 108
Chris@4 109 while (sp > 0) {
Chris@4 110
Chris@4 111 AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
Chris@4 112
Chris@4 113 fpop ( lo, hi );
Chris@4 114 if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
Chris@4 115 fallbackSimpleSort ( fmap, eclass, lo, hi );
Chris@4 116 continue;
Chris@4 117 }
Chris@4 118
Chris@4 119 /* Random partitioning. Median of 3 sometimes fails to
Chris@4 120 avoid bad cases. Median of 9 seems to help but
Chris@4 121 looks rather expensive. This too seems to work but
Chris@4 122 is cheaper. Guidance for the magic constants
Chris@4 123 7621 and 32768 is taken from Sedgewick's algorithms
Chris@4 124 book, chapter 35.
Chris@4 125 */
Chris@4 126 r = ((r * 7621) + 1) % 32768;
Chris@4 127 r3 = r % 3;
Chris@4 128 if (r3 == 0) med = eclass[fmap[lo]]; else
Chris@4 129 if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
Chris@4 130 med = eclass[fmap[hi]];
Chris@4 131
Chris@4 132 unLo = ltLo = lo;
Chris@4 133 unHi = gtHi = hi;
Chris@4 134
Chris@4 135 while (1) {
Chris@4 136 while (1) {
Chris@4 137 if (unLo > unHi) break;
Chris@4 138 n = (Int32)eclass[fmap[unLo]] - (Int32)med;
Chris@4 139 if (n == 0) {
Chris@4 140 fswap(fmap[unLo], fmap[ltLo]);
Chris@4 141 ltLo++; unLo++;
Chris@4 142 continue;
Chris@4 143 };
Chris@4 144 if (n > 0) break;
Chris@4 145 unLo++;
Chris@4 146 }
Chris@4 147 while (1) {
Chris@4 148 if (unLo > unHi) break;
Chris@4 149 n = (Int32)eclass[fmap[unHi]] - (Int32)med;
Chris@4 150 if (n == 0) {
Chris@4 151 fswap(fmap[unHi], fmap[gtHi]);
Chris@4 152 gtHi--; unHi--;
Chris@4 153 continue;
Chris@4 154 };
Chris@4 155 if (n < 0) break;
Chris@4 156 unHi--;
Chris@4 157 }
Chris@4 158 if (unLo > unHi) break;
Chris@4 159 fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
Chris@4 160 }
Chris@4 161
Chris@4 162 AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
Chris@4 163
Chris@4 164 if (gtHi < ltLo) continue;
Chris@4 165
Chris@4 166 n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
Chris@4 167 m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
Chris@4 168
Chris@4 169 n = lo + unLo - ltLo - 1;
Chris@4 170 m = hi - (gtHi - unHi) + 1;
Chris@4 171
Chris@4 172 if (n - lo > hi - m) {
Chris@4 173 fpush ( lo, n );
Chris@4 174 fpush ( m, hi );
Chris@4 175 } else {
Chris@4 176 fpush ( m, hi );
Chris@4 177 fpush ( lo, n );
Chris@4 178 }
Chris@4 179 }
Chris@4 180 }
Chris@4 181
Chris@4 182 #undef fmin
Chris@4 183 #undef fpush
Chris@4 184 #undef fpop
Chris@4 185 #undef fswap
Chris@4 186 #undef fvswap
Chris@4 187 #undef FALLBACK_QSORT_SMALL_THRESH
Chris@4 188 #undef FALLBACK_QSORT_STACK_SIZE
Chris@4 189
Chris@4 190
Chris@4 191 /*---------------------------------------------*/
Chris@4 192 /* Pre:
Chris@4 193 nblock > 0
Chris@4 194 eclass exists for [0 .. nblock-1]
Chris@4 195 ((UChar*)eclass) [0 .. nblock-1] holds block
Chris@4 196 ptr exists for [0 .. nblock-1]
Chris@4 197
Chris@4 198 Post:
Chris@4 199 ((UChar*)eclass) [0 .. nblock-1] holds block
Chris@4 200 All other areas of eclass destroyed
Chris@4 201 fmap [0 .. nblock-1] holds sorted order
Chris@4 202 bhtab [ 0 .. 2+(nblock/32) ] destroyed
Chris@4 203 */
Chris@4 204
Chris@4 205 #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
Chris@4 206 #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
Chris@4 207 #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
Chris@4 208 #define WORD_BH(zz) bhtab[(zz) >> 5]
Chris@4 209 #define UNALIGNED_BH(zz) ((zz) & 0x01f)
Chris@4 210
Chris@4 211 static
Chris@4 212 void fallbackSort ( UInt32* fmap,
Chris@4 213 UInt32* eclass,
Chris@4 214 UInt32* bhtab,
Chris@4 215 Int32 nblock,
Chris@4 216 Int32 verb )
Chris@4 217 {
Chris@4 218 Int32 ftab[257];
Chris@4 219 Int32 ftabCopy[256];
Chris@4 220 Int32 H, i, j, k, l, r, cc, cc1;
Chris@4 221 Int32 nNotDone;
Chris@4 222 Int32 nBhtab;
Chris@4 223 UChar* eclass8 = (UChar*)eclass;
Chris@4 224
Chris@4 225 /*--
Chris@4 226 Initial 1-char radix sort to generate
Chris@4 227 initial fmap and initial BH bits.
Chris@4 228 --*/
Chris@4 229 if (verb >= 4)
Chris@4 230 VPrintf0 ( " bucket sorting ...\n" );
Chris@4 231 for (i = 0; i < 257; i++) ftab[i] = 0;
Chris@4 232 for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
Chris@4 233 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
Chris@4 234 for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
Chris@4 235
Chris@4 236 for (i = 0; i < nblock; i++) {
Chris@4 237 j = eclass8[i];
Chris@4 238 k = ftab[j] - 1;
Chris@4 239 ftab[j] = k;
Chris@4 240 fmap[k] = i;
Chris@4 241 }
Chris@4 242
Chris@4 243 nBhtab = 2 + (nblock / 32);
Chris@4 244 for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
Chris@4 245 for (i = 0; i < 256; i++) SET_BH(ftab[i]);
Chris@4 246
Chris@4 247 /*--
Chris@4 248 Inductively refine the buckets. Kind-of an
Chris@4 249 "exponential radix sort" (!), inspired by the
Chris@4 250 Manber-Myers suffix array construction algorithm.
Chris@4 251 --*/
Chris@4 252
Chris@4 253 /*-- set sentinel bits for block-end detection --*/
Chris@4 254 for (i = 0; i < 32; i++) {
Chris@4 255 SET_BH(nblock + 2*i);
Chris@4 256 CLEAR_BH(nblock + 2*i + 1);
Chris@4 257 }
Chris@4 258
Chris@4 259 /*-- the log(N) loop --*/
Chris@4 260 H = 1;
Chris@4 261 while (1) {
Chris@4 262
Chris@4 263 if (verb >= 4)
Chris@4 264 VPrintf1 ( " depth %6d has ", H );
Chris@4 265
Chris@4 266 j = 0;
Chris@4 267 for (i = 0; i < nblock; i++) {
Chris@4 268 if (ISSET_BH(i)) j = i;
Chris@4 269 k = fmap[i] - H; if (k < 0) k += nblock;
Chris@4 270 eclass[k] = j;
Chris@4 271 }
Chris@4 272
Chris@4 273 nNotDone = 0;
Chris@4 274 r = -1;
Chris@4 275 while (1) {
Chris@4 276
Chris@4 277 /*-- find the next non-singleton bucket --*/
Chris@4 278 k = r + 1;
Chris@4 279 while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
Chris@4 280 if (ISSET_BH(k)) {
Chris@4 281 while (WORD_BH(k) == 0xffffffff) k += 32;
Chris@4 282 while (ISSET_BH(k)) k++;
Chris@4 283 }
Chris@4 284 l = k - 1;
Chris@4 285 if (l >= nblock) break;
Chris@4 286 while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
Chris@4 287 if (!ISSET_BH(k)) {
Chris@4 288 while (WORD_BH(k) == 0x00000000) k += 32;
Chris@4 289 while (!ISSET_BH(k)) k++;
Chris@4 290 }
Chris@4 291 r = k - 1;
Chris@4 292 if (r >= nblock) break;
Chris@4 293
Chris@4 294 /*-- now [l, r] bracket current bucket --*/
Chris@4 295 if (r > l) {
Chris@4 296 nNotDone += (r - l + 1);
Chris@4 297 fallbackQSort3 ( fmap, eclass, l, r );
Chris@4 298
Chris@4 299 /*-- scan bucket and generate header bits-- */
Chris@4 300 cc = -1;
Chris@4 301 for (i = l; i <= r; i++) {
Chris@4 302 cc1 = eclass[fmap[i]];
Chris@4 303 if (cc != cc1) { SET_BH(i); cc = cc1; };
Chris@4 304 }
Chris@4 305 }
Chris@4 306 }
Chris@4 307
Chris@4 308 if (verb >= 4)
Chris@4 309 VPrintf1 ( "%6d unresolved strings\n", nNotDone );
Chris@4 310
Chris@4 311 H *= 2;
Chris@4 312 if (H > nblock || nNotDone == 0) break;
Chris@4 313 }
Chris@4 314
Chris@4 315 /*--
Chris@4 316 Reconstruct the original block in
Chris@4 317 eclass8 [0 .. nblock-1], since the
Chris@4 318 previous phase destroyed it.
Chris@4 319 --*/
Chris@4 320 if (verb >= 4)
Chris@4 321 VPrintf0 ( " reconstructing block ...\n" );
Chris@4 322 j = 0;
Chris@4 323 for (i = 0; i < nblock; i++) {
Chris@4 324 while (ftabCopy[j] == 0) j++;
Chris@4 325 ftabCopy[j]--;
Chris@4 326 eclass8[fmap[i]] = (UChar)j;
Chris@4 327 }
Chris@4 328 AssertH ( j < 256, 1005 );
Chris@4 329 }
Chris@4 330
Chris@4 331 #undef SET_BH
Chris@4 332 #undef CLEAR_BH
Chris@4 333 #undef ISSET_BH
Chris@4 334 #undef WORD_BH
Chris@4 335 #undef UNALIGNED_BH
Chris@4 336
Chris@4 337
Chris@4 338 /*---------------------------------------------*/
Chris@4 339 /*--- The main, O(N^2 log(N)) sorting ---*/
Chris@4 340 /*--- algorithm. Faster for "normal" ---*/
Chris@4 341 /*--- non-repetitive blocks. ---*/
Chris@4 342 /*---------------------------------------------*/
Chris@4 343
Chris@4 344 /*---------------------------------------------*/
Chris@4 345 static
Chris@4 346 __inline__
Chris@4 347 Bool mainGtU ( UInt32 i1,
Chris@4 348 UInt32 i2,
Chris@4 349 UChar* block,
Chris@4 350 UInt16* quadrant,
Chris@4 351 UInt32 nblock,
Chris@4 352 Int32* budget )
Chris@4 353 {
Chris@4 354 Int32 k;
Chris@4 355 UChar c1, c2;
Chris@4 356 UInt16 s1, s2;
Chris@4 357
Chris@4 358 AssertD ( i1 != i2, "mainGtU" );
Chris@4 359 /* 1 */
Chris@4 360 c1 = block[i1]; c2 = block[i2];
Chris@4 361 if (c1 != c2) return (c1 > c2);
Chris@4 362 i1++; i2++;
Chris@4 363 /* 2 */
Chris@4 364 c1 = block[i1]; c2 = block[i2];
Chris@4 365 if (c1 != c2) return (c1 > c2);
Chris@4 366 i1++; i2++;
Chris@4 367 /* 3 */
Chris@4 368 c1 = block[i1]; c2 = block[i2];
Chris@4 369 if (c1 != c2) return (c1 > c2);
Chris@4 370 i1++; i2++;
Chris@4 371 /* 4 */
Chris@4 372 c1 = block[i1]; c2 = block[i2];
Chris@4 373 if (c1 != c2) return (c1 > c2);
Chris@4 374 i1++; i2++;
Chris@4 375 /* 5 */
Chris@4 376 c1 = block[i1]; c2 = block[i2];
Chris@4 377 if (c1 != c2) return (c1 > c2);
Chris@4 378 i1++; i2++;
Chris@4 379 /* 6 */
Chris@4 380 c1 = block[i1]; c2 = block[i2];
Chris@4 381 if (c1 != c2) return (c1 > c2);
Chris@4 382 i1++; i2++;
Chris@4 383 /* 7 */
Chris@4 384 c1 = block[i1]; c2 = block[i2];
Chris@4 385 if (c1 != c2) return (c1 > c2);
Chris@4 386 i1++; i2++;
Chris@4 387 /* 8 */
Chris@4 388 c1 = block[i1]; c2 = block[i2];
Chris@4 389 if (c1 != c2) return (c1 > c2);
Chris@4 390 i1++; i2++;
Chris@4 391 /* 9 */
Chris@4 392 c1 = block[i1]; c2 = block[i2];
Chris@4 393 if (c1 != c2) return (c1 > c2);
Chris@4 394 i1++; i2++;
Chris@4 395 /* 10 */
Chris@4 396 c1 = block[i1]; c2 = block[i2];
Chris@4 397 if (c1 != c2) return (c1 > c2);
Chris@4 398 i1++; i2++;
Chris@4 399 /* 11 */
Chris@4 400 c1 = block[i1]; c2 = block[i2];
Chris@4 401 if (c1 != c2) return (c1 > c2);
Chris@4 402 i1++; i2++;
Chris@4 403 /* 12 */
Chris@4 404 c1 = block[i1]; c2 = block[i2];
Chris@4 405 if (c1 != c2) return (c1 > c2);
Chris@4 406 i1++; i2++;
Chris@4 407
Chris@4 408 k = nblock + 8;
Chris@4 409
Chris@4 410 do {
Chris@4 411 /* 1 */
Chris@4 412 c1 = block[i1]; c2 = block[i2];
Chris@4 413 if (c1 != c2) return (c1 > c2);
Chris@4 414 s1 = quadrant[i1]; s2 = quadrant[i2];
Chris@4 415 if (s1 != s2) return (s1 > s2);
Chris@4 416 i1++; i2++;
Chris@4 417 /* 2 */
Chris@4 418 c1 = block[i1]; c2 = block[i2];
Chris@4 419 if (c1 != c2) return (c1 > c2);
Chris@4 420 s1 = quadrant[i1]; s2 = quadrant[i2];
Chris@4 421 if (s1 != s2) return (s1 > s2);
Chris@4 422 i1++; i2++;
Chris@4 423 /* 3 */
Chris@4 424 c1 = block[i1]; c2 = block[i2];
Chris@4 425 if (c1 != c2) return (c1 > c2);
Chris@4 426 s1 = quadrant[i1]; s2 = quadrant[i2];
Chris@4 427 if (s1 != s2) return (s1 > s2);
Chris@4 428 i1++; i2++;
Chris@4 429 /* 4 */
Chris@4 430 c1 = block[i1]; c2 = block[i2];
Chris@4 431 if (c1 != c2) return (c1 > c2);
Chris@4 432 s1 = quadrant[i1]; s2 = quadrant[i2];
Chris@4 433 if (s1 != s2) return (s1 > s2);
Chris@4 434 i1++; i2++;
Chris@4 435 /* 5 */
Chris@4 436 c1 = block[i1]; c2 = block[i2];
Chris@4 437 if (c1 != c2) return (c1 > c2);
Chris@4 438 s1 = quadrant[i1]; s2 = quadrant[i2];
Chris@4 439 if (s1 != s2) return (s1 > s2);
Chris@4 440 i1++; i2++;
Chris@4 441 /* 6 */
Chris@4 442 c1 = block[i1]; c2 = block[i2];
Chris@4 443 if (c1 != c2) return (c1 > c2);
Chris@4 444 s1 = quadrant[i1]; s2 = quadrant[i2];
Chris@4 445 if (s1 != s2) return (s1 > s2);
Chris@4 446 i1++; i2++;
Chris@4 447 /* 7 */
Chris@4 448 c1 = block[i1]; c2 = block[i2];
Chris@4 449 if (c1 != c2) return (c1 > c2);
Chris@4 450 s1 = quadrant[i1]; s2 = quadrant[i2];
Chris@4 451 if (s1 != s2) return (s1 > s2);
Chris@4 452 i1++; i2++;
Chris@4 453 /* 8 */
Chris@4 454 c1 = block[i1]; c2 = block[i2];
Chris@4 455 if (c1 != c2) return (c1 > c2);
Chris@4 456 s1 = quadrant[i1]; s2 = quadrant[i2];
Chris@4 457 if (s1 != s2) return (s1 > s2);
Chris@4 458 i1++; i2++;
Chris@4 459
Chris@4 460 if (i1 >= nblock) i1 -= nblock;
Chris@4 461 if (i2 >= nblock) i2 -= nblock;
Chris@4 462
Chris@4 463 k -= 8;
Chris@4 464 (*budget)--;
Chris@4 465 }
Chris@4 466 while (k >= 0);
Chris@4 467
Chris@4 468 return False;
Chris@4 469 }
Chris@4 470
Chris@4 471
Chris@4 472 /*---------------------------------------------*/
Chris@4 473 /*--
Chris@4 474 Knuth's increments seem to work better
Chris@4 475 than Incerpi-Sedgewick here. Possibly
Chris@4 476 because the number of elems to sort is
Chris@4 477 usually small, typically <= 20.
Chris@4 478 --*/
Chris@4 479 static
Chris@4 480 Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
Chris@4 481 9841, 29524, 88573, 265720,
Chris@4 482 797161, 2391484 };
Chris@4 483
Chris@4 484 static
Chris@4 485 void mainSimpleSort ( UInt32* ptr,
Chris@4 486 UChar* block,
Chris@4 487 UInt16* quadrant,
Chris@4 488 Int32 nblock,
Chris@4 489 Int32 lo,
Chris@4 490 Int32 hi,
Chris@4 491 Int32 d,
Chris@4 492 Int32* budget )
Chris@4 493 {
Chris@4 494 Int32 i, j, h, bigN, hp;
Chris@4 495 UInt32 v;
Chris@4 496
Chris@4 497 bigN = hi - lo + 1;
Chris@4 498 if (bigN < 2) return;
Chris@4 499
Chris@4 500 hp = 0;
Chris@4 501 while (incs[hp] < bigN) hp++;
Chris@4 502 hp--;
Chris@4 503
Chris@4 504 for (; hp >= 0; hp--) {
Chris@4 505 h = incs[hp];
Chris@4 506
Chris@4 507 i = lo + h;
Chris@4 508 while (True) {
Chris@4 509
Chris@4 510 /*-- copy 1 --*/
Chris@4 511 if (i > hi) break;
Chris@4 512 v = ptr[i];
Chris@4 513 j = i;
Chris@4 514 while ( mainGtU (
Chris@4 515 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
Chris@4 516 ) ) {
Chris@4 517 ptr[j] = ptr[j-h];
Chris@4 518 j = j - h;
Chris@4 519 if (j <= (lo + h - 1)) break;
Chris@4 520 }
Chris@4 521 ptr[j] = v;
Chris@4 522 i++;
Chris@4 523
Chris@4 524 /*-- copy 2 --*/
Chris@4 525 if (i > hi) break;
Chris@4 526 v = ptr[i];
Chris@4 527 j = i;
Chris@4 528 while ( mainGtU (
Chris@4 529 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
Chris@4 530 ) ) {
Chris@4 531 ptr[j] = ptr[j-h];
Chris@4 532 j = j - h;
Chris@4 533 if (j <= (lo + h - 1)) break;
Chris@4 534 }
Chris@4 535 ptr[j] = v;
Chris@4 536 i++;
Chris@4 537
Chris@4 538 /*-- copy 3 --*/
Chris@4 539 if (i > hi) break;
Chris@4 540 v = ptr[i];
Chris@4 541 j = i;
Chris@4 542 while ( mainGtU (
Chris@4 543 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
Chris@4 544 ) ) {
Chris@4 545 ptr[j] = ptr[j-h];
Chris@4 546 j = j - h;
Chris@4 547 if (j <= (lo + h - 1)) break;
Chris@4 548 }
Chris@4 549 ptr[j] = v;
Chris@4 550 i++;
Chris@4 551
Chris@4 552 if (*budget < 0) return;
Chris@4 553 }
Chris@4 554 }
Chris@4 555 }
Chris@4 556
Chris@4 557
Chris@4 558 /*---------------------------------------------*/
Chris@4 559 /*--
Chris@4 560 The following is an implementation of
Chris@4 561 an elegant 3-way quicksort for strings,
Chris@4 562 described in a paper "Fast Algorithms for
Chris@4 563 Sorting and Searching Strings", by Robert
Chris@4 564 Sedgewick and Jon L. Bentley.
Chris@4 565 --*/
Chris@4 566
Chris@4 567 #define mswap(zz1, zz2) \
Chris@4 568 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
Chris@4 569
Chris@4 570 #define mvswap(zzp1, zzp2, zzn) \
Chris@4 571 { \
Chris@4 572 Int32 yyp1 = (zzp1); \
Chris@4 573 Int32 yyp2 = (zzp2); \
Chris@4 574 Int32 yyn = (zzn); \
Chris@4 575 while (yyn > 0) { \
Chris@4 576 mswap(ptr[yyp1], ptr[yyp2]); \
Chris@4 577 yyp1++; yyp2++; yyn--; \
Chris@4 578 } \
Chris@4 579 }
Chris@4 580
Chris@4 581 static
Chris@4 582 __inline__
Chris@4 583 UChar mmed3 ( UChar a, UChar b, UChar c )
Chris@4 584 {
Chris@4 585 UChar t;
Chris@4 586 if (a > b) { t = a; a = b; b = t; };
Chris@4 587 if (b > c) {
Chris@4 588 b = c;
Chris@4 589 if (a > b) b = a;
Chris@4 590 }
Chris@4 591 return b;
Chris@4 592 }
Chris@4 593
Chris@4 594 #define mmin(a,b) ((a) < (b)) ? (a) : (b)
Chris@4 595
Chris@4 596 #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
Chris@4 597 stackHi[sp] = hz; \
Chris@4 598 stackD [sp] = dz; \
Chris@4 599 sp++; }
Chris@4 600
Chris@4 601 #define mpop(lz,hz,dz) { sp--; \
Chris@4 602 lz = stackLo[sp]; \
Chris@4 603 hz = stackHi[sp]; \
Chris@4 604 dz = stackD [sp]; }
Chris@4 605
Chris@4 606
Chris@4 607 #define mnextsize(az) (nextHi[az]-nextLo[az])
Chris@4 608
Chris@4 609 #define mnextswap(az,bz) \
Chris@4 610 { Int32 tz; \
Chris@4 611 tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
Chris@4 612 tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
Chris@4 613 tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
Chris@4 614
Chris@4 615
Chris@4 616 #define MAIN_QSORT_SMALL_THRESH 20
Chris@4 617 #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
Chris@4 618 #define MAIN_QSORT_STACK_SIZE 100
Chris@4 619
Chris@4 620 static
Chris@4 621 void mainQSort3 ( UInt32* ptr,
Chris@4 622 UChar* block,
Chris@4 623 UInt16* quadrant,
Chris@4 624 Int32 nblock,
Chris@4 625 Int32 loSt,
Chris@4 626 Int32 hiSt,
Chris@4 627 Int32 dSt,
Chris@4 628 Int32* budget )
Chris@4 629 {
Chris@4 630 Int32 unLo, unHi, ltLo, gtHi, n, m, med;
Chris@4 631 Int32 sp, lo, hi, d;
Chris@4 632
Chris@4 633 Int32 stackLo[MAIN_QSORT_STACK_SIZE];
Chris@4 634 Int32 stackHi[MAIN_QSORT_STACK_SIZE];
Chris@4 635 Int32 stackD [MAIN_QSORT_STACK_SIZE];
Chris@4 636
Chris@4 637 Int32 nextLo[3];
Chris@4 638 Int32 nextHi[3];
Chris@4 639 Int32 nextD [3];
Chris@4 640
Chris@4 641 sp = 0;
Chris@4 642 mpush ( loSt, hiSt, dSt );
Chris@4 643
Chris@4 644 while (sp > 0) {
Chris@4 645
Chris@4 646 AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
Chris@4 647
Chris@4 648 mpop ( lo, hi, d );
Chris@4 649 if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
Chris@4 650 d > MAIN_QSORT_DEPTH_THRESH) {
Chris@4 651 mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
Chris@4 652 if (*budget < 0) return;
Chris@4 653 continue;
Chris@4 654 }
Chris@4 655
Chris@4 656 med = (Int32)
Chris@4 657 mmed3 ( block[ptr[ lo ]+d],
Chris@4 658 block[ptr[ hi ]+d],
Chris@4 659 block[ptr[ (lo+hi)>>1 ]+d] );
Chris@4 660
Chris@4 661 unLo = ltLo = lo;
Chris@4 662 unHi = gtHi = hi;
Chris@4 663
Chris@4 664 while (True) {
Chris@4 665 while (True) {
Chris@4 666 if (unLo > unHi) break;
Chris@4 667 n = ((Int32)block[ptr[unLo]+d]) - med;
Chris@4 668 if (n == 0) {
Chris@4 669 mswap(ptr[unLo], ptr[ltLo]);
Chris@4 670 ltLo++; unLo++; continue;
Chris@4 671 };
Chris@4 672 if (n > 0) break;
Chris@4 673 unLo++;
Chris@4 674 }
Chris@4 675 while (True) {
Chris@4 676 if (unLo > unHi) break;
Chris@4 677 n = ((Int32)block[ptr[unHi]+d]) - med;
Chris@4 678 if (n == 0) {
Chris@4 679 mswap(ptr[unHi], ptr[gtHi]);
Chris@4 680 gtHi--; unHi--; continue;
Chris@4 681 };
Chris@4 682 if (n < 0) break;
Chris@4 683 unHi--;
Chris@4 684 }
Chris@4 685 if (unLo > unHi) break;
Chris@4 686 mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
Chris@4 687 }
Chris@4 688
Chris@4 689 AssertD ( unHi == unLo-1, "mainQSort3(2)" );
Chris@4 690
Chris@4 691 if (gtHi < ltLo) {
Chris@4 692 mpush(lo, hi, d+1 );
Chris@4 693 continue;
Chris@4 694 }
Chris@4 695
Chris@4 696 n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
Chris@4 697 m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
Chris@4 698
Chris@4 699 n = lo + unLo - ltLo - 1;
Chris@4 700 m = hi - (gtHi - unHi) + 1;
Chris@4 701
Chris@4 702 nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
Chris@4 703 nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
Chris@4 704 nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
Chris@4 705
Chris@4 706 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
Chris@4 707 if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
Chris@4 708 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
Chris@4 709
Chris@4 710 AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
Chris@4 711 AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
Chris@4 712
Chris@4 713 mpush (nextLo[0], nextHi[0], nextD[0]);
Chris@4 714 mpush (nextLo[1], nextHi[1], nextD[1]);
Chris@4 715 mpush (nextLo[2], nextHi[2], nextD[2]);
Chris@4 716 }
Chris@4 717 }
Chris@4 718
Chris@4 719 #undef mswap
Chris@4 720 #undef mvswap
Chris@4 721 #undef mpush
Chris@4 722 #undef mpop
Chris@4 723 #undef mmin
Chris@4 724 #undef mnextsize
Chris@4 725 #undef mnextswap
Chris@4 726 #undef MAIN_QSORT_SMALL_THRESH
Chris@4 727 #undef MAIN_QSORT_DEPTH_THRESH
Chris@4 728 #undef MAIN_QSORT_STACK_SIZE
Chris@4 729
Chris@4 730
Chris@4 731 /*---------------------------------------------*/
Chris@4 732 /* Pre:
Chris@4 733 nblock > N_OVERSHOOT
Chris@4 734 block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
Chris@4 735 ((UChar*)block32) [0 .. nblock-1] holds block
Chris@4 736 ptr exists for [0 .. nblock-1]
Chris@4 737
Chris@4 738 Post:
Chris@4 739 ((UChar*)block32) [0 .. nblock-1] holds block
Chris@4 740 All other areas of block32 destroyed
Chris@4 741 ftab [0 .. 65536 ] destroyed
Chris@4 742 ptr [0 .. nblock-1] holds sorted order
Chris@4 743 if (*budget < 0), sorting was abandoned
Chris@4 744 */
Chris@4 745
Chris@4 746 #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
Chris@4 747 #define SETMASK (1 << 21)
Chris@4 748 #define CLEARMASK (~(SETMASK))
Chris@4 749
Chris@4 750 static
Chris@4 751 void mainSort ( UInt32* ptr,
Chris@4 752 UChar* block,
Chris@4 753 UInt16* quadrant,
Chris@4 754 UInt32* ftab,
Chris@4 755 Int32 nblock,
Chris@4 756 Int32 verb,
Chris@4 757 Int32* budget )
Chris@4 758 {
Chris@4 759 Int32 i, j, k, ss, sb;
Chris@4 760 Int32 runningOrder[256];
Chris@4 761 Bool bigDone[256];
Chris@4 762 Int32 copyStart[256];
Chris@4 763 Int32 copyEnd [256];
Chris@4 764 UChar c1;
Chris@4 765 Int32 numQSorted;
Chris@4 766 UInt16 s;
Chris@4 767 if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
Chris@4 768
Chris@4 769 /*-- set up the 2-byte frequency table --*/
Chris@4 770 for (i = 65536; i >= 0; i--) ftab[i] = 0;
Chris@4 771
Chris@4 772 j = block[0] << 8;
Chris@4 773 i = nblock-1;
Chris@4 774 for (; i >= 3; i -= 4) {
Chris@4 775 quadrant[i] = 0;
Chris@4 776 j = (j >> 8) | ( ((UInt16)block[i]) << 8);
Chris@4 777 ftab[j]++;
Chris@4 778 quadrant[i-1] = 0;
Chris@4 779 j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
Chris@4 780 ftab[j]++;
Chris@4 781 quadrant[i-2] = 0;
Chris@4 782 j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
Chris@4 783 ftab[j]++;
Chris@4 784 quadrant[i-3] = 0;
Chris@4 785 j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
Chris@4 786 ftab[j]++;
Chris@4 787 }
Chris@4 788 for (; i >= 0; i--) {
Chris@4 789 quadrant[i] = 0;
Chris@4 790 j = (j >> 8) | ( ((UInt16)block[i]) << 8);
Chris@4 791 ftab[j]++;
Chris@4 792 }
Chris@4 793
Chris@4 794 /*-- (emphasises close relationship of block & quadrant) --*/
Chris@4 795 for (i = 0; i < BZ_N_OVERSHOOT; i++) {
Chris@4 796 block [nblock+i] = block[i];
Chris@4 797 quadrant[nblock+i] = 0;
Chris@4 798 }
Chris@4 799
Chris@4 800 if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
Chris@4 801
Chris@4 802 /*-- Complete the initial radix sort --*/
Chris@4 803 for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
Chris@4 804
Chris@4 805 s = block[0] << 8;
Chris@4 806 i = nblock-1;
Chris@4 807 for (; i >= 3; i -= 4) {
Chris@4 808 s = (s >> 8) | (block[i] << 8);
Chris@4 809 j = ftab[s] -1;
Chris@4 810 ftab[s] = j;
Chris@4 811 ptr[j] = i;
Chris@4 812 s = (s >> 8) | (block[i-1] << 8);
Chris@4 813 j = ftab[s] -1;
Chris@4 814 ftab[s] = j;
Chris@4 815 ptr[j] = i-1;
Chris@4 816 s = (s >> 8) | (block[i-2] << 8);
Chris@4 817 j = ftab[s] -1;
Chris@4 818 ftab[s] = j;
Chris@4 819 ptr[j] = i-2;
Chris@4 820 s = (s >> 8) | (block[i-3] << 8);
Chris@4 821 j = ftab[s] -1;
Chris@4 822 ftab[s] = j;
Chris@4 823 ptr[j] = i-3;
Chris@4 824 }
Chris@4 825 for (; i >= 0; i--) {
Chris@4 826 s = (s >> 8) | (block[i] << 8);
Chris@4 827 j = ftab[s] -1;
Chris@4 828 ftab[s] = j;
Chris@4 829 ptr[j] = i;
Chris@4 830 }
Chris@4 831
Chris@4 832 /*--
Chris@4 833 Now ftab contains the first loc of every small bucket.
Chris@4 834 Calculate the running order, from smallest to largest
Chris@4 835 big bucket.
Chris@4 836 --*/
Chris@4 837 for (i = 0; i <= 255; i++) {
Chris@4 838 bigDone [i] = False;
Chris@4 839 runningOrder[i] = i;
Chris@4 840 }
Chris@4 841
Chris@4 842 {
Chris@4 843 Int32 vv;
Chris@4 844 Int32 h = 1;
Chris@4 845 do h = 3 * h + 1; while (h <= 256);
Chris@4 846 do {
Chris@4 847 h = h / 3;
Chris@4 848 for (i = h; i <= 255; i++) {
Chris@4 849 vv = runningOrder[i];
Chris@4 850 j = i;
Chris@4 851 while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
Chris@4 852 runningOrder[j] = runningOrder[j-h];
Chris@4 853 j = j - h;
Chris@4 854 if (j <= (h - 1)) goto zero;
Chris@4 855 }
Chris@4 856 zero:
Chris@4 857 runningOrder[j] = vv;
Chris@4 858 }
Chris@4 859 } while (h != 1);
Chris@4 860 }
Chris@4 861
Chris@4 862 /*--
Chris@4 863 The main sorting loop.
Chris@4 864 --*/
Chris@4 865
Chris@4 866 numQSorted = 0;
Chris@4 867
Chris@4 868 for (i = 0; i <= 255; i++) {
Chris@4 869
Chris@4 870 /*--
Chris@4 871 Process big buckets, starting with the least full.
Chris@4 872 Basically this is a 3-step process in which we call
Chris@4 873 mainQSort3 to sort the small buckets [ss, j], but
Chris@4 874 also make a big effort to avoid the calls if we can.
Chris@4 875 --*/
Chris@4 876 ss = runningOrder[i];
Chris@4 877
Chris@4 878 /*--
Chris@4 879 Step 1:
Chris@4 880 Complete the big bucket [ss] by quicksorting
Chris@4 881 any unsorted small buckets [ss, j], for j != ss.
Chris@4 882 Hopefully previous pointer-scanning phases have already
Chris@4 883 completed many of the small buckets [ss, j], so
Chris@4 884 we don't have to sort them at all.
Chris@4 885 --*/
Chris@4 886 for (j = 0; j <= 255; j++) {
Chris@4 887 if (j != ss) {
Chris@4 888 sb = (ss << 8) + j;
Chris@4 889 if ( ! (ftab[sb] & SETMASK) ) {
Chris@4 890 Int32 lo = ftab[sb] & CLEARMASK;
Chris@4 891 Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
Chris@4 892 if (hi > lo) {
Chris@4 893 if (verb >= 4)
Chris@4 894 VPrintf4 ( " qsort [0x%x, 0x%x] "
Chris@4 895 "done %d this %d\n",
Chris@4 896 ss, j, numQSorted, hi - lo + 1 );
Chris@4 897 mainQSort3 (
Chris@4 898 ptr, block, quadrant, nblock,
Chris@4 899 lo, hi, BZ_N_RADIX, budget
Chris@4 900 );
Chris@4 901 numQSorted += (hi - lo + 1);
Chris@4 902 if (*budget < 0) return;
Chris@4 903 }
Chris@4 904 }
Chris@4 905 ftab[sb] |= SETMASK;
Chris@4 906 }
Chris@4 907 }
Chris@4 908
Chris@4 909 AssertH ( !bigDone[ss], 1006 );
Chris@4 910
Chris@4 911 /*--
Chris@4 912 Step 2:
Chris@4 913 Now scan this big bucket [ss] so as to synthesise the
Chris@4 914 sorted order for small buckets [t, ss] for all t,
Chris@4 915 including, magically, the bucket [ss,ss] too.
Chris@4 916 This will avoid doing Real Work in subsequent Step 1's.
Chris@4 917 --*/
Chris@4 918 {
Chris@4 919 for (j = 0; j <= 255; j++) {
Chris@4 920 copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
Chris@4 921 copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
Chris@4 922 }
Chris@4 923 for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
Chris@4 924 k = ptr[j]-1; if (k < 0) k += nblock;
Chris@4 925 c1 = block[k];
Chris@4 926 if (!bigDone[c1])
Chris@4 927 ptr[ copyStart[c1]++ ] = k;
Chris@4 928 }
Chris@4 929 for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
Chris@4 930 k = ptr[j]-1; if (k < 0) k += nblock;
Chris@4 931 c1 = block[k];
Chris@4 932 if (!bigDone[c1])
Chris@4 933 ptr[ copyEnd[c1]-- ] = k;
Chris@4 934 }
Chris@4 935 }
Chris@4 936
Chris@4 937 AssertH ( (copyStart[ss]-1 == copyEnd[ss])
Chris@4 938 ||
Chris@4 939 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
Chris@4 940 Necessity for this case is demonstrated by compressing
Chris@4 941 a sequence of approximately 48.5 million of character
Chris@4 942 251; 1.0.0/1.0.1 will then die here. */
Chris@4 943 (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
Chris@4 944 1007 )
Chris@4 945
Chris@4 946 for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
Chris@4 947
Chris@4 948 /*--
Chris@4 949 Step 3:
Chris@4 950 The [ss] big bucket is now done. Record this fact,
Chris@4 951 and update the quadrant descriptors. Remember to
Chris@4 952 update quadrants in the overshoot area too, if
Chris@4 953 necessary. The "if (i < 255)" test merely skips
Chris@4 954 this updating for the last bucket processed, since
Chris@4 955 updating for the last bucket is pointless.
Chris@4 956
Chris@4 957 The quadrant array provides a way to incrementally
Chris@4 958 cache sort orderings, as they appear, so as to
Chris@4 959 make subsequent comparisons in fullGtU() complete
Chris@4 960 faster. For repetitive blocks this makes a big
Chris@4 961 difference (but not big enough to be able to avoid
Chris@4 962 the fallback sorting mechanism, exponential radix sort).
Chris@4 963
Chris@4 964 The precise meaning is: at all times:
Chris@4 965
Chris@4 966 for 0 <= i < nblock and 0 <= j <= nblock
Chris@4 967
Chris@4 968 if block[i] != block[j],
Chris@4 969
Chris@4 970 then the relative values of quadrant[i] and
Chris@4 971 quadrant[j] are meaningless.
Chris@4 972
Chris@4 973 else {
Chris@4 974 if quadrant[i] < quadrant[j]
Chris@4 975 then the string starting at i lexicographically
Chris@4 976 precedes the string starting at j
Chris@4 977
Chris@4 978 else if quadrant[i] > quadrant[j]
Chris@4 979 then the string starting at j lexicographically
Chris@4 980 precedes the string starting at i
Chris@4 981
Chris@4 982 else
Chris@4 983 the relative ordering of the strings starting
Chris@4 984 at i and j has not yet been determined.
Chris@4 985 }
Chris@4 986 --*/
Chris@4 987 bigDone[ss] = True;
Chris@4 988
Chris@4 989 if (i < 255) {
Chris@4 990 Int32 bbStart = ftab[ss << 8] & CLEARMASK;
Chris@4 991 Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
Chris@4 992 Int32 shifts = 0;
Chris@4 993
Chris@4 994 while ((bbSize >> shifts) > 65534) shifts++;
Chris@4 995
Chris@4 996 for (j = bbSize-1; j >= 0; j--) {
Chris@4 997 Int32 a2update = ptr[bbStart + j];
Chris@4 998 UInt16 qVal = (UInt16)(j >> shifts);
Chris@4 999 quadrant[a2update] = qVal;
Chris@4 1000 if (a2update < BZ_N_OVERSHOOT)
Chris@4 1001 quadrant[a2update + nblock] = qVal;
Chris@4 1002 }
Chris@4 1003 AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
Chris@4 1004 }
Chris@4 1005
Chris@4 1006 }
Chris@4 1007
Chris@4 1008 if (verb >= 4)
Chris@4 1009 VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
Chris@4 1010 nblock, numQSorted, nblock - numQSorted );
Chris@4 1011 }
Chris@4 1012
Chris@4 1013 #undef BIGFREQ
Chris@4 1014 #undef SETMASK
Chris@4 1015 #undef CLEARMASK
Chris@4 1016
Chris@4 1017
Chris@4 1018 /*---------------------------------------------*/
Chris@4 1019 /* Pre:
Chris@4 1020 nblock > 0
Chris@4 1021 arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
Chris@4 1022 ((UChar*)arr2) [0 .. nblock-1] holds block
Chris@4 1023 arr1 exists for [0 .. nblock-1]
Chris@4 1024
Chris@4 1025 Post:
Chris@4 1026 ((UChar*)arr2) [0 .. nblock-1] holds block
Chris@4 1027 All other areas of block destroyed
Chris@4 1028 ftab [ 0 .. 65536 ] destroyed
Chris@4 1029 arr1 [0 .. nblock-1] holds sorted order
Chris@4 1030 */
Chris@4 1031 void BZ2_blockSort ( EState* s )
Chris@4 1032 {
Chris@4 1033 UInt32* ptr = s->ptr;
Chris@4 1034 UChar* block = s->block;
Chris@4 1035 UInt32* ftab = s->ftab;
Chris@4 1036 Int32 nblock = s->nblock;
Chris@4 1037 Int32 verb = s->verbosity;
Chris@4 1038 Int32 wfact = s->workFactor;
Chris@4 1039 UInt16* quadrant;
Chris@4 1040 Int32 budget;
Chris@4 1041 Int32 budgetInit;
Chris@4 1042 Int32 i;
Chris@4 1043
Chris@4 1044 if (nblock < 10000) {
Chris@4 1045 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
Chris@4 1046 } else {
Chris@4 1047 /* Calculate the location for quadrant, remembering to get
Chris@4 1048 the alignment right. Assumes that &(block[0]) is at least
Chris@4 1049 2-byte aligned -- this should be ok since block is really
Chris@4 1050 the first section of arr2.
Chris@4 1051 */
Chris@4 1052 i = nblock+BZ_N_OVERSHOOT;
Chris@4 1053 if (i & 1) i++;
Chris@4 1054 quadrant = (UInt16*)(&(block[i]));
Chris@4 1055
Chris@4 1056 /* (wfact-1) / 3 puts the default-factor-30
Chris@4 1057 transition point at very roughly the same place as
Chris@4 1058 with v0.1 and v0.9.0.
Chris@4 1059 Not that it particularly matters any more, since the
Chris@4 1060 resulting compressed stream is now the same regardless
Chris@4 1061 of whether or not we use the main sort or fallback sort.
Chris@4 1062 */
Chris@4 1063 if (wfact < 1 ) wfact = 1;
Chris@4 1064 if (wfact > 100) wfact = 100;
Chris@4 1065 budgetInit = nblock * ((wfact-1) / 3);
Chris@4 1066 budget = budgetInit;
Chris@4 1067
Chris@4 1068 mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
Chris@4 1069 if (verb >= 3)
Chris@4 1070 VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
Chris@4 1071 budgetInit - budget,
Chris@4 1072 nblock,
Chris@4 1073 (float)(budgetInit - budget) /
Chris@4 1074 (float)(nblock==0 ? 1 : nblock) );
Chris@4 1075 if (budget < 0) {
Chris@4 1076 if (verb >= 2)
Chris@4 1077 VPrintf0 ( " too repetitive; using fallback"
Chris@4 1078 " sorting algorithm\n" );
Chris@4 1079 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
Chris@4 1080 }
Chris@4 1081 }
Chris@4 1082
Chris@4 1083 s->origPtr = -1;
Chris@4 1084 for (i = 0; i < s->nblock; i++)
Chris@4 1085 if (ptr[i] == 0)
Chris@4 1086 { s->origPtr = i; break; };
Chris@4 1087
Chris@4 1088 AssertH( s->origPtr != -1, 1003 );
Chris@4 1089 }
Chris@4 1090
Chris@4 1091
Chris@4 1092 /*-------------------------------------------------------------*/
Chris@4 1093 /*--- end blocksort.c ---*/
Chris@4 1094 /*-------------------------------------------------------------*/