cannam@89: cannam@89: /*-------------------------------------------------------------*/ cannam@89: /*--- Compression machinery (not incl block sorting) ---*/ cannam@89: /*--- compress.c ---*/ cannam@89: /*-------------------------------------------------------------*/ cannam@89: cannam@89: /* ------------------------------------------------------------------ cannam@89: This file is part of bzip2/libbzip2, a program and library for cannam@89: lossless, block-sorting data compression. cannam@89: cannam@89: bzip2/libbzip2 version 1.0.6 of 6 September 2010 cannam@89: Copyright (C) 1996-2010 Julian Seward cannam@89: cannam@89: Please read the WARNING, DISCLAIMER and PATENTS sections in the cannam@89: README file. cannam@89: cannam@89: This program is released under the terms of the license contained cannam@89: in the file LICENSE. cannam@89: ------------------------------------------------------------------ */ cannam@89: cannam@89: cannam@89: /* CHANGES cannam@89: 0.9.0 -- original version. cannam@89: 0.9.0a/b -- no changes in this file. cannam@89: 0.9.0c -- changed setting of nGroups in sendMTFValues() cannam@89: so as to do a bit better on small files cannam@89: */ cannam@89: cannam@89: #include "bzlib_private.h" cannam@89: cannam@89: cannam@89: /*---------------------------------------------------*/ cannam@89: /*--- Bit stream I/O ---*/ cannam@89: /*---------------------------------------------------*/ cannam@89: cannam@89: /*---------------------------------------------------*/ cannam@89: void BZ2_bsInitWrite ( EState* s ) cannam@89: { cannam@89: s->bsLive = 0; cannam@89: s->bsBuff = 0; cannam@89: } cannam@89: cannam@89: cannam@89: /*---------------------------------------------------*/ cannam@89: static cannam@89: void bsFinishWrite ( EState* s ) cannam@89: { cannam@89: while (s->bsLive > 0) { cannam@89: s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24); cannam@89: s->numZ++; cannam@89: s->bsBuff <<= 8; cannam@89: s->bsLive -= 8; cannam@89: } cannam@89: } cannam@89: cannam@89: cannam@89: /*---------------------------------------------------*/ cannam@89: #define bsNEEDW(nz) \ cannam@89: { \ cannam@89: while (s->bsLive >= 8) { \ cannam@89: s->zbits[s->numZ] \ cannam@89: = (UChar)(s->bsBuff >> 24); \ cannam@89: s->numZ++; \ cannam@89: s->bsBuff <<= 8; \ cannam@89: s->bsLive -= 8; \ cannam@89: } \ cannam@89: } cannam@89: cannam@89: cannam@89: /*---------------------------------------------------*/ cannam@89: static cannam@89: __inline__ cannam@89: void bsW ( EState* s, Int32 n, UInt32 v ) cannam@89: { cannam@89: bsNEEDW ( n ); cannam@89: s->bsBuff |= (v << (32 - s->bsLive - n)); cannam@89: s->bsLive += n; cannam@89: } cannam@89: cannam@89: cannam@89: /*---------------------------------------------------*/ cannam@89: static cannam@89: void bsPutUInt32 ( EState* s, UInt32 u ) cannam@89: { cannam@89: bsW ( s, 8, (u >> 24) & 0xffL ); cannam@89: bsW ( s, 8, (u >> 16) & 0xffL ); cannam@89: bsW ( s, 8, (u >> 8) & 0xffL ); cannam@89: bsW ( s, 8, u & 0xffL ); cannam@89: } cannam@89: cannam@89: cannam@89: /*---------------------------------------------------*/ cannam@89: static cannam@89: void bsPutUChar ( EState* s, UChar c ) cannam@89: { cannam@89: bsW( s, 8, (UInt32)c ); cannam@89: } cannam@89: cannam@89: cannam@89: /*---------------------------------------------------*/ cannam@89: /*--- The back end proper ---*/ cannam@89: /*---------------------------------------------------*/ cannam@89: cannam@89: /*---------------------------------------------------*/ cannam@89: static cannam@89: void makeMaps_e ( EState* s ) cannam@89: { cannam@89: Int32 i; cannam@89: s->nInUse = 0; cannam@89: for (i = 0; i < 256; i++) cannam@89: if (s->inUse[i]) { cannam@89: s->unseqToSeq[i] = s->nInUse; cannam@89: s->nInUse++; cannam@89: } cannam@89: } cannam@89: cannam@89: cannam@89: /*---------------------------------------------------*/ cannam@89: static cannam@89: void generateMTFValues ( EState* s ) cannam@89: { cannam@89: UChar yy[256]; cannam@89: Int32 i, j; cannam@89: Int32 zPend; cannam@89: Int32 wr; cannam@89: Int32 EOB; cannam@89: cannam@89: /* cannam@89: After sorting (eg, here), cannam@89: s->arr1 [ 0 .. s->nblock-1 ] holds sorted order, cannam@89: and cannam@89: ((UChar*)s->arr2) [ 0 .. s->nblock-1 ] cannam@89: holds the original block data. cannam@89: cannam@89: The first thing to do is generate the MTF values, cannam@89: and put them in cannam@89: ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ]. cannam@89: Because there are strictly fewer or equal MTF values cannam@89: than block values, ptr values in this area are overwritten cannam@89: with MTF values only when they are no longer needed. cannam@89: cannam@89: The final compressed bitstream is generated into the cannam@89: area starting at cannam@89: (UChar*) (&((UChar*)s->arr2)[s->nblock]) cannam@89: cannam@89: These storage aliases are set up in bzCompressInit(), cannam@89: except for the last one, which is arranged in cannam@89: compressBlock(). cannam@89: */ cannam@89: UInt32* ptr = s->ptr; cannam@89: UChar* block = s->block; cannam@89: UInt16* mtfv = s->mtfv; cannam@89: cannam@89: makeMaps_e ( s ); cannam@89: EOB = s->nInUse+1; cannam@89: cannam@89: for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0; cannam@89: cannam@89: wr = 0; cannam@89: zPend = 0; cannam@89: for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i; cannam@89: cannam@89: for (i = 0; i < s->nblock; i++) { cannam@89: UChar ll_i; cannam@89: AssertD ( wr <= i, "generateMTFValues(1)" ); cannam@89: j = ptr[i]-1; if (j < 0) j += s->nblock; cannam@89: ll_i = s->unseqToSeq[block[j]]; cannam@89: AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" ); cannam@89: cannam@89: if (yy[0] == ll_i) { cannam@89: zPend++; cannam@89: } else { cannam@89: cannam@89: if (zPend > 0) { cannam@89: zPend--; cannam@89: while (True) { cannam@89: if (zPend & 1) { cannam@89: mtfv[wr] = BZ_RUNB; wr++; cannam@89: s->mtfFreq[BZ_RUNB]++; cannam@89: } else { cannam@89: mtfv[wr] = BZ_RUNA; wr++; cannam@89: s->mtfFreq[BZ_RUNA]++; cannam@89: } cannam@89: if (zPend < 2) break; cannam@89: zPend = (zPend - 2) / 2; cannam@89: }; cannam@89: zPend = 0; cannam@89: } cannam@89: { cannam@89: register UChar rtmp; cannam@89: register UChar* ryy_j; cannam@89: register UChar rll_i; cannam@89: rtmp = yy[1]; cannam@89: yy[1] = yy[0]; cannam@89: ryy_j = &(yy[1]); cannam@89: rll_i = ll_i; cannam@89: while ( rll_i != rtmp ) { cannam@89: register UChar rtmp2; cannam@89: ryy_j++; cannam@89: rtmp2 = rtmp; cannam@89: rtmp = *ryy_j; cannam@89: *ryy_j = rtmp2; cannam@89: }; cannam@89: yy[0] = rtmp; cannam@89: j = ryy_j - &(yy[0]); cannam@89: mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++; cannam@89: } cannam@89: cannam@89: } cannam@89: } cannam@89: cannam@89: if (zPend > 0) { cannam@89: zPend--; cannam@89: while (True) { cannam@89: if (zPend & 1) { cannam@89: mtfv[wr] = BZ_RUNB; wr++; cannam@89: s->mtfFreq[BZ_RUNB]++; cannam@89: } else { cannam@89: mtfv[wr] = BZ_RUNA; wr++; cannam@89: s->mtfFreq[BZ_RUNA]++; cannam@89: } cannam@89: if (zPend < 2) break; cannam@89: zPend = (zPend - 2) / 2; cannam@89: }; cannam@89: zPend = 0; cannam@89: } cannam@89: cannam@89: mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++; cannam@89: cannam@89: s->nMTF = wr; cannam@89: } cannam@89: cannam@89: cannam@89: /*---------------------------------------------------*/ cannam@89: #define BZ_LESSER_ICOST 0 cannam@89: #define BZ_GREATER_ICOST 15 cannam@89: cannam@89: static cannam@89: void sendMTFValues ( EState* s ) cannam@89: { cannam@89: Int32 v, t, i, j, gs, ge, totc, bt, bc, iter; cannam@89: Int32 nSelectors, alphaSize, minLen, maxLen, selCtr; cannam@89: Int32 nGroups, nBytes; cannam@89: cannam@89: /*-- cannam@89: UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; cannam@89: is a global since the decoder also needs it. cannam@89: cannam@89: Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; cannam@89: Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; cannam@89: are also globals only used in this proc. cannam@89: Made global to keep stack frame size small. cannam@89: --*/ cannam@89: cannam@89: cannam@89: UInt16 cost[BZ_N_GROUPS]; cannam@89: Int32 fave[BZ_N_GROUPS]; cannam@89: cannam@89: UInt16* mtfv = s->mtfv; cannam@89: cannam@89: if (s->verbosity >= 3) cannam@89: VPrintf3( " %d in block, %d after MTF & 1-2 coding, " cannam@89: "%d+2 syms in use\n", cannam@89: s->nblock, s->nMTF, s->nInUse ); cannam@89: cannam@89: alphaSize = s->nInUse+2; cannam@89: for (t = 0; t < BZ_N_GROUPS; t++) cannam@89: for (v = 0; v < alphaSize; v++) cannam@89: s->len[t][v] = BZ_GREATER_ICOST; cannam@89: cannam@89: /*--- Decide how many coding tables to use ---*/ cannam@89: AssertH ( s->nMTF > 0, 3001 ); cannam@89: if (s->nMTF < 200) nGroups = 2; else cannam@89: if (s->nMTF < 600) nGroups = 3; else cannam@89: if (s->nMTF < 1200) nGroups = 4; else cannam@89: if (s->nMTF < 2400) nGroups = 5; else cannam@89: nGroups = 6; cannam@89: cannam@89: /*--- Generate an initial set of coding tables ---*/ cannam@89: { cannam@89: Int32 nPart, remF, tFreq, aFreq; cannam@89: cannam@89: nPart = nGroups; cannam@89: remF = s->nMTF; cannam@89: gs = 0; cannam@89: while (nPart > 0) { cannam@89: tFreq = remF / nPart; cannam@89: ge = gs-1; cannam@89: aFreq = 0; cannam@89: while (aFreq < tFreq && ge < alphaSize-1) { cannam@89: ge++; cannam@89: aFreq += s->mtfFreq[ge]; cannam@89: } cannam@89: cannam@89: if (ge > gs cannam@89: && nPart != nGroups && nPart != 1 cannam@89: && ((nGroups-nPart) % 2 == 1)) { cannam@89: aFreq -= s->mtfFreq[ge]; cannam@89: ge--; cannam@89: } cannam@89: cannam@89: if (s->verbosity >= 3) cannam@89: VPrintf5( " initial group %d, [%d .. %d], " cannam@89: "has %d syms (%4.1f%%)\n", cannam@89: nPart, gs, ge, aFreq, cannam@89: (100.0 * (float)aFreq) / (float)(s->nMTF) ); cannam@89: cannam@89: for (v = 0; v < alphaSize; v++) cannam@89: if (v >= gs && v <= ge) cannam@89: s->len[nPart-1][v] = BZ_LESSER_ICOST; else cannam@89: s->len[nPart-1][v] = BZ_GREATER_ICOST; cannam@89: cannam@89: nPart--; cannam@89: gs = ge+1; cannam@89: remF -= aFreq; cannam@89: } cannam@89: } cannam@89: cannam@89: /*--- cannam@89: Iterate up to BZ_N_ITERS times to improve the tables. cannam@89: ---*/ cannam@89: for (iter = 0; iter < BZ_N_ITERS; iter++) { cannam@89: cannam@89: for (t = 0; t < nGroups; t++) fave[t] = 0; cannam@89: cannam@89: for (t = 0; t < nGroups; t++) cannam@89: for (v = 0; v < alphaSize; v++) cannam@89: s->rfreq[t][v] = 0; cannam@89: cannam@89: /*--- cannam@89: Set up an auxiliary length table which is used to fast-track cannam@89: the common case (nGroups == 6). cannam@89: ---*/ cannam@89: if (nGroups == 6) { cannam@89: for (v = 0; v < alphaSize; v++) { cannam@89: s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v]; cannam@89: s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v]; cannam@89: s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v]; cannam@89: } cannam@89: } cannam@89: cannam@89: nSelectors = 0; cannam@89: totc = 0; cannam@89: gs = 0; cannam@89: while (True) { cannam@89: cannam@89: /*--- Set group start & end marks. --*/ cannam@89: if (gs >= s->nMTF) break; cannam@89: ge = gs + BZ_G_SIZE - 1; cannam@89: if (ge >= s->nMTF) ge = s->nMTF-1; cannam@89: cannam@89: /*-- cannam@89: Calculate the cost of this group as coded cannam@89: by each of the coding tables. cannam@89: --*/ cannam@89: for (t = 0; t < nGroups; t++) cost[t] = 0; cannam@89: cannam@89: if (nGroups == 6 && 50 == ge-gs+1) { cannam@89: /*--- fast track the common case ---*/ cannam@89: register UInt32 cost01, cost23, cost45; cannam@89: register UInt16 icv; cannam@89: cost01 = cost23 = cost45 = 0; cannam@89: cannam@89: # define BZ_ITER(nn) \ cannam@89: icv = mtfv[gs+(nn)]; \ cannam@89: cost01 += s->len_pack[icv][0]; \ cannam@89: cost23 += s->len_pack[icv][1]; \ cannam@89: cost45 += s->len_pack[icv][2]; \ cannam@89: cannam@89: BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4); cannam@89: BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9); cannam@89: BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14); cannam@89: BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19); cannam@89: BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24); cannam@89: BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29); cannam@89: BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34); cannam@89: BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39); cannam@89: BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44); cannam@89: BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49); cannam@89: cannam@89: # undef BZ_ITER cannam@89: cannam@89: cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16; cannam@89: cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16; cannam@89: cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16; cannam@89: cannam@89: } else { cannam@89: /*--- slow version which correctly handles all situations ---*/ cannam@89: for (i = gs; i <= ge; i++) { cannam@89: UInt16 icv = mtfv[i]; cannam@89: for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv]; cannam@89: } cannam@89: } cannam@89: cannam@89: /*-- cannam@89: Find the coding table which is best for this group, cannam@89: and record its identity in the selector table. cannam@89: --*/ cannam@89: bc = 999999999; bt = -1; cannam@89: for (t = 0; t < nGroups; t++) cannam@89: if (cost[t] < bc) { bc = cost[t]; bt = t; }; cannam@89: totc += bc; cannam@89: fave[bt]++; cannam@89: s->selector[nSelectors] = bt; cannam@89: nSelectors++; cannam@89: cannam@89: /*-- cannam@89: Increment the symbol frequencies for the selected table. cannam@89: --*/ cannam@89: if (nGroups == 6 && 50 == ge-gs+1) { cannam@89: /*--- fast track the common case ---*/ cannam@89: cannam@89: # define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++ cannam@89: cannam@89: BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4); cannam@89: BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9); cannam@89: BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14); cannam@89: BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19); cannam@89: BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24); cannam@89: BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29); cannam@89: BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34); cannam@89: BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39); cannam@89: BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44); cannam@89: BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49); cannam@89: cannam@89: # undef BZ_ITUR cannam@89: cannam@89: } else { cannam@89: /*--- slow version which correctly handles all situations ---*/ cannam@89: for (i = gs; i <= ge; i++) cannam@89: s->rfreq[bt][ mtfv[i] ]++; cannam@89: } cannam@89: cannam@89: gs = ge+1; cannam@89: } cannam@89: if (s->verbosity >= 3) { cannam@89: VPrintf2 ( " pass %d: size is %d, grp uses are ", cannam@89: iter+1, totc/8 ); cannam@89: for (t = 0; t < nGroups; t++) cannam@89: VPrintf1 ( "%d ", fave[t] ); cannam@89: VPrintf0 ( "\n" ); cannam@89: } cannam@89: cannam@89: /*-- cannam@89: Recompute the tables based on the accumulated frequencies. cannam@89: --*/ cannam@89: /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See cannam@89: comment in huffman.c for details. */ cannam@89: for (t = 0; t < nGroups; t++) cannam@89: BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), cannam@89: alphaSize, 17 /*20*/ ); cannam@89: } cannam@89: cannam@89: cannam@89: AssertH( nGroups < 8, 3002 ); cannam@89: AssertH( nSelectors < 32768 && cannam@89: nSelectors <= (2 + (900000 / BZ_G_SIZE)), cannam@89: 3003 ); cannam@89: cannam@89: cannam@89: /*--- Compute MTF values for the selectors. ---*/ cannam@89: { cannam@89: UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp; cannam@89: for (i = 0; i < nGroups; i++) pos[i] = i; cannam@89: for (i = 0; i < nSelectors; i++) { cannam@89: ll_i = s->selector[i]; cannam@89: j = 0; cannam@89: tmp = pos[j]; cannam@89: while ( ll_i != tmp ) { cannam@89: j++; cannam@89: tmp2 = tmp; cannam@89: tmp = pos[j]; cannam@89: pos[j] = tmp2; cannam@89: }; cannam@89: pos[0] = tmp; cannam@89: s->selectorMtf[i] = j; cannam@89: } cannam@89: }; cannam@89: cannam@89: /*--- Assign actual codes for the tables. --*/ cannam@89: for (t = 0; t < nGroups; t++) { cannam@89: minLen = 32; cannam@89: maxLen = 0; cannam@89: for (i = 0; i < alphaSize; i++) { cannam@89: if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; cannam@89: if (s->len[t][i] < minLen) minLen = s->len[t][i]; cannam@89: } cannam@89: AssertH ( !(maxLen > 17 /*20*/ ), 3004 ); cannam@89: AssertH ( !(minLen < 1), 3005 ); cannam@89: BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), cannam@89: minLen, maxLen, alphaSize ); cannam@89: } cannam@89: cannam@89: /*--- Transmit the mapping table. ---*/ cannam@89: { cannam@89: Bool inUse16[16]; cannam@89: for (i = 0; i < 16; i++) { cannam@89: inUse16[i] = False; cannam@89: for (j = 0; j < 16; j++) cannam@89: if (s->inUse[i * 16 + j]) inUse16[i] = True; cannam@89: } cannam@89: cannam@89: nBytes = s->numZ; cannam@89: for (i = 0; i < 16; i++) cannam@89: if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0); cannam@89: cannam@89: for (i = 0; i < 16; i++) cannam@89: if (inUse16[i]) cannam@89: for (j = 0; j < 16; j++) { cannam@89: if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0); cannam@89: } cannam@89: cannam@89: if (s->verbosity >= 3) cannam@89: VPrintf1( " bytes: mapping %d, ", s->numZ-nBytes ); cannam@89: } cannam@89: cannam@89: /*--- Now the selectors. ---*/ cannam@89: nBytes = s->numZ; cannam@89: bsW ( s, 3, nGroups ); cannam@89: bsW ( s, 15, nSelectors ); cannam@89: for (i = 0; i < nSelectors; i++) { cannam@89: for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1); cannam@89: bsW(s,1,0); cannam@89: } cannam@89: if (s->verbosity >= 3) cannam@89: VPrintf1( "selectors %d, ", s->numZ-nBytes ); cannam@89: cannam@89: /*--- Now the coding tables. ---*/ cannam@89: nBytes = s->numZ; cannam@89: cannam@89: for (t = 0; t < nGroups; t++) { cannam@89: Int32 curr = s->len[t][0]; cannam@89: bsW ( s, 5, curr ); cannam@89: for (i = 0; i < alphaSize; i++) { cannam@89: while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ }; cannam@89: while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ }; cannam@89: bsW ( s, 1, 0 ); cannam@89: } cannam@89: } cannam@89: cannam@89: if (s->verbosity >= 3) cannam@89: VPrintf1 ( "code lengths %d, ", s->numZ-nBytes ); cannam@89: cannam@89: /*--- And finally, the block data proper ---*/ cannam@89: nBytes = s->numZ; cannam@89: selCtr = 0; cannam@89: gs = 0; cannam@89: while (True) { cannam@89: if (gs >= s->nMTF) break; cannam@89: ge = gs + BZ_G_SIZE - 1; cannam@89: if (ge >= s->nMTF) ge = s->nMTF-1; cannam@89: AssertH ( s->selector[selCtr] < nGroups, 3006 ); cannam@89: cannam@89: if (nGroups == 6 && 50 == ge-gs+1) { cannam@89: /*--- fast track the common case ---*/ cannam@89: UInt16 mtfv_i; cannam@89: UChar* s_len_sel_selCtr cannam@89: = &(s->len[s->selector[selCtr]][0]); cannam@89: Int32* s_code_sel_selCtr cannam@89: = &(s->code[s->selector[selCtr]][0]); cannam@89: cannam@89: # define BZ_ITAH(nn) \ cannam@89: mtfv_i = mtfv[gs+(nn)]; \ cannam@89: bsW ( s, \ cannam@89: s_len_sel_selCtr[mtfv_i], \ cannam@89: s_code_sel_selCtr[mtfv_i] ) cannam@89: cannam@89: BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4); cannam@89: BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9); cannam@89: BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14); cannam@89: BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19); cannam@89: BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24); cannam@89: BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29); cannam@89: BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34); cannam@89: BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39); cannam@89: BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44); cannam@89: BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49); cannam@89: cannam@89: # undef BZ_ITAH cannam@89: cannam@89: } else { cannam@89: /*--- slow version which correctly handles all situations ---*/ cannam@89: for (i = gs; i <= ge; i++) { cannam@89: bsW ( s, cannam@89: s->len [s->selector[selCtr]] [mtfv[i]], cannam@89: s->code [s->selector[selCtr]] [mtfv[i]] ); cannam@89: } cannam@89: } cannam@89: cannam@89: cannam@89: gs = ge+1; cannam@89: selCtr++; cannam@89: } cannam@89: AssertH( selCtr == nSelectors, 3007 ); cannam@89: cannam@89: if (s->verbosity >= 3) cannam@89: VPrintf1( "codes %d\n", s->numZ-nBytes ); cannam@89: } cannam@89: cannam@89: cannam@89: /*---------------------------------------------------*/ cannam@89: void BZ2_compressBlock ( EState* s, Bool is_last_block ) cannam@89: { cannam@89: if (s->nblock > 0) { cannam@89: cannam@89: BZ_FINALISE_CRC ( s->blockCRC ); cannam@89: s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31); cannam@89: s->combinedCRC ^= s->blockCRC; cannam@89: if (s->blockNo > 1) s->numZ = 0; cannam@89: cannam@89: if (s->verbosity >= 2) cannam@89: VPrintf4( " block %d: crc = 0x%08x, " cannam@89: "combined CRC = 0x%08x, size = %d\n", cannam@89: s->blockNo, s->blockCRC, s->combinedCRC, s->nblock ); cannam@89: cannam@89: BZ2_blockSort ( s ); cannam@89: } cannam@89: cannam@89: s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]); cannam@89: cannam@89: /*-- If this is the first block, create the stream header. --*/ cannam@89: if (s->blockNo == 1) { cannam@89: BZ2_bsInitWrite ( s ); cannam@89: bsPutUChar ( s, BZ_HDR_B ); cannam@89: bsPutUChar ( s, BZ_HDR_Z ); cannam@89: bsPutUChar ( s, BZ_HDR_h ); cannam@89: bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) ); cannam@89: } cannam@89: cannam@89: if (s->nblock > 0) { cannam@89: cannam@89: bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 ); cannam@89: bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 ); cannam@89: bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 ); cannam@89: cannam@89: /*-- Now the block's CRC, so it is in a known place. --*/ cannam@89: bsPutUInt32 ( s, s->blockCRC ); cannam@89: cannam@89: /*-- cannam@89: Now a single bit indicating (non-)randomisation. cannam@89: As of version 0.9.5, we use a better sorting algorithm cannam@89: which makes randomisation unnecessary. So always set cannam@89: the randomised bit to 'no'. Of course, the decoder cannam@89: still needs to be able to handle randomised blocks cannam@89: so as to maintain backwards compatibility with cannam@89: older versions of bzip2. cannam@89: --*/ cannam@89: bsW(s,1,0); cannam@89: cannam@89: bsW ( s, 24, s->origPtr ); cannam@89: generateMTFValues ( s ); cannam@89: sendMTFValues ( s ); cannam@89: } cannam@89: cannam@89: cannam@89: /*-- If this is the last block, add the stream trailer. --*/ cannam@89: if (is_last_block) { cannam@89: cannam@89: bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 ); cannam@89: bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 ); cannam@89: bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 ); cannam@89: bsPutUInt32 ( s, s->combinedCRC ); cannam@89: if (s->verbosity >= 2) cannam@89: VPrintf1( " final combined CRC = 0x%08x\n ", s->combinedCRC ); cannam@89: bsFinishWrite ( s ); cannam@89: } cannam@89: } cannam@89: cannam@89: cannam@89: /*-------------------------------------------------------------*/ cannam@89: /*--- end compress.c ---*/ cannam@89: /*-------------------------------------------------------------*/