cannam@89
|
1
|
cannam@89
|
2 /*-------------------------------------------------------------*/
|
cannam@89
|
3 /*--- Compression machinery (not incl block sorting) ---*/
|
cannam@89
|
4 /*--- compress.c ---*/
|
cannam@89
|
5 /*-------------------------------------------------------------*/
|
cannam@89
|
6
|
cannam@89
|
7 /* ------------------------------------------------------------------
|
cannam@89
|
8 This file is part of bzip2/libbzip2, a program and library for
|
cannam@89
|
9 lossless, block-sorting data compression.
|
cannam@89
|
10
|
cannam@89
|
11 bzip2/libbzip2 version 1.0.6 of 6 September 2010
|
cannam@89
|
12 Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
|
cannam@89
|
13
|
cannam@89
|
14 Please read the WARNING, DISCLAIMER and PATENTS sections in the
|
cannam@89
|
15 README file.
|
cannam@89
|
16
|
cannam@89
|
17 This program is released under the terms of the license contained
|
cannam@89
|
18 in the file LICENSE.
|
cannam@89
|
19 ------------------------------------------------------------------ */
|
cannam@89
|
20
|
cannam@89
|
21
|
cannam@89
|
22 /* CHANGES
|
cannam@89
|
23 0.9.0 -- original version.
|
cannam@89
|
24 0.9.0a/b -- no changes in this file.
|
cannam@89
|
25 0.9.0c -- changed setting of nGroups in sendMTFValues()
|
cannam@89
|
26 so as to do a bit better on small files
|
cannam@89
|
27 */
|
cannam@89
|
28
|
cannam@89
|
29 #include "bzlib_private.h"
|
cannam@89
|
30
|
cannam@89
|
31
|
cannam@89
|
32 /*---------------------------------------------------*/
|
cannam@89
|
33 /*--- Bit stream I/O ---*/
|
cannam@89
|
34 /*---------------------------------------------------*/
|
cannam@89
|
35
|
cannam@89
|
36 /*---------------------------------------------------*/
|
cannam@89
|
37 void BZ2_bsInitWrite ( EState* s )
|
cannam@89
|
38 {
|
cannam@89
|
39 s->bsLive = 0;
|
cannam@89
|
40 s->bsBuff = 0;
|
cannam@89
|
41 }
|
cannam@89
|
42
|
cannam@89
|
43
|
cannam@89
|
44 /*---------------------------------------------------*/
|
cannam@89
|
45 static
|
cannam@89
|
46 void bsFinishWrite ( EState* s )
|
cannam@89
|
47 {
|
cannam@89
|
48 while (s->bsLive > 0) {
|
cannam@89
|
49 s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24);
|
cannam@89
|
50 s->numZ++;
|
cannam@89
|
51 s->bsBuff <<= 8;
|
cannam@89
|
52 s->bsLive -= 8;
|
cannam@89
|
53 }
|
cannam@89
|
54 }
|
cannam@89
|
55
|
cannam@89
|
56
|
cannam@89
|
57 /*---------------------------------------------------*/
|
cannam@89
|
58 #define bsNEEDW(nz) \
|
cannam@89
|
59 { \
|
cannam@89
|
60 while (s->bsLive >= 8) { \
|
cannam@89
|
61 s->zbits[s->numZ] \
|
cannam@89
|
62 = (UChar)(s->bsBuff >> 24); \
|
cannam@89
|
63 s->numZ++; \
|
cannam@89
|
64 s->bsBuff <<= 8; \
|
cannam@89
|
65 s->bsLive -= 8; \
|
cannam@89
|
66 } \
|
cannam@89
|
67 }
|
cannam@89
|
68
|
cannam@89
|
69
|
cannam@89
|
70 /*---------------------------------------------------*/
|
cannam@89
|
71 static
|
cannam@89
|
72 __inline__
|
cannam@89
|
73 void bsW ( EState* s, Int32 n, UInt32 v )
|
cannam@89
|
74 {
|
cannam@89
|
75 bsNEEDW ( n );
|
cannam@89
|
76 s->bsBuff |= (v << (32 - s->bsLive - n));
|
cannam@89
|
77 s->bsLive += n;
|
cannam@89
|
78 }
|
cannam@89
|
79
|
cannam@89
|
80
|
cannam@89
|
81 /*---------------------------------------------------*/
|
cannam@89
|
82 static
|
cannam@89
|
83 void bsPutUInt32 ( EState* s, UInt32 u )
|
cannam@89
|
84 {
|
cannam@89
|
85 bsW ( s, 8, (u >> 24) & 0xffL );
|
cannam@89
|
86 bsW ( s, 8, (u >> 16) & 0xffL );
|
cannam@89
|
87 bsW ( s, 8, (u >> 8) & 0xffL );
|
cannam@89
|
88 bsW ( s, 8, u & 0xffL );
|
cannam@89
|
89 }
|
cannam@89
|
90
|
cannam@89
|
91
|
cannam@89
|
92 /*---------------------------------------------------*/
|
cannam@89
|
93 static
|
cannam@89
|
94 void bsPutUChar ( EState* s, UChar c )
|
cannam@89
|
95 {
|
cannam@89
|
96 bsW( s, 8, (UInt32)c );
|
cannam@89
|
97 }
|
cannam@89
|
98
|
cannam@89
|
99
|
cannam@89
|
100 /*---------------------------------------------------*/
|
cannam@89
|
101 /*--- The back end proper ---*/
|
cannam@89
|
102 /*---------------------------------------------------*/
|
cannam@89
|
103
|
cannam@89
|
104 /*---------------------------------------------------*/
|
cannam@89
|
105 static
|
cannam@89
|
106 void makeMaps_e ( EState* s )
|
cannam@89
|
107 {
|
cannam@89
|
108 Int32 i;
|
cannam@89
|
109 s->nInUse = 0;
|
cannam@89
|
110 for (i = 0; i < 256; i++)
|
cannam@89
|
111 if (s->inUse[i]) {
|
cannam@89
|
112 s->unseqToSeq[i] = s->nInUse;
|
cannam@89
|
113 s->nInUse++;
|
cannam@89
|
114 }
|
cannam@89
|
115 }
|
cannam@89
|
116
|
cannam@89
|
117
|
cannam@89
|
118 /*---------------------------------------------------*/
|
cannam@89
|
119 static
|
cannam@89
|
120 void generateMTFValues ( EState* s )
|
cannam@89
|
121 {
|
cannam@89
|
122 UChar yy[256];
|
cannam@89
|
123 Int32 i, j;
|
cannam@89
|
124 Int32 zPend;
|
cannam@89
|
125 Int32 wr;
|
cannam@89
|
126 Int32 EOB;
|
cannam@89
|
127
|
cannam@89
|
128 /*
|
cannam@89
|
129 After sorting (eg, here),
|
cannam@89
|
130 s->arr1 [ 0 .. s->nblock-1 ] holds sorted order,
|
cannam@89
|
131 and
|
cannam@89
|
132 ((UChar*)s->arr2) [ 0 .. s->nblock-1 ]
|
cannam@89
|
133 holds the original block data.
|
cannam@89
|
134
|
cannam@89
|
135 The first thing to do is generate the MTF values,
|
cannam@89
|
136 and put them in
|
cannam@89
|
137 ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ].
|
cannam@89
|
138 Because there are strictly fewer or equal MTF values
|
cannam@89
|
139 than block values, ptr values in this area are overwritten
|
cannam@89
|
140 with MTF values only when they are no longer needed.
|
cannam@89
|
141
|
cannam@89
|
142 The final compressed bitstream is generated into the
|
cannam@89
|
143 area starting at
|
cannam@89
|
144 (UChar*) (&((UChar*)s->arr2)[s->nblock])
|
cannam@89
|
145
|
cannam@89
|
146 These storage aliases are set up in bzCompressInit(),
|
cannam@89
|
147 except for the last one, which is arranged in
|
cannam@89
|
148 compressBlock().
|
cannam@89
|
149 */
|
cannam@89
|
150 UInt32* ptr = s->ptr;
|
cannam@89
|
151 UChar* block = s->block;
|
cannam@89
|
152 UInt16* mtfv = s->mtfv;
|
cannam@89
|
153
|
cannam@89
|
154 makeMaps_e ( s );
|
cannam@89
|
155 EOB = s->nInUse+1;
|
cannam@89
|
156
|
cannam@89
|
157 for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0;
|
cannam@89
|
158
|
cannam@89
|
159 wr = 0;
|
cannam@89
|
160 zPend = 0;
|
cannam@89
|
161 for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i;
|
cannam@89
|
162
|
cannam@89
|
163 for (i = 0; i < s->nblock; i++) {
|
cannam@89
|
164 UChar ll_i;
|
cannam@89
|
165 AssertD ( wr <= i, "generateMTFValues(1)" );
|
cannam@89
|
166 j = ptr[i]-1; if (j < 0) j += s->nblock;
|
cannam@89
|
167 ll_i = s->unseqToSeq[block[j]];
|
cannam@89
|
168 AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
|
cannam@89
|
169
|
cannam@89
|
170 if (yy[0] == ll_i) {
|
cannam@89
|
171 zPend++;
|
cannam@89
|
172 } else {
|
cannam@89
|
173
|
cannam@89
|
174 if (zPend > 0) {
|
cannam@89
|
175 zPend--;
|
cannam@89
|
176 while (True) {
|
cannam@89
|
177 if (zPend & 1) {
|
cannam@89
|
178 mtfv[wr] = BZ_RUNB; wr++;
|
cannam@89
|
179 s->mtfFreq[BZ_RUNB]++;
|
cannam@89
|
180 } else {
|
cannam@89
|
181 mtfv[wr] = BZ_RUNA; wr++;
|
cannam@89
|
182 s->mtfFreq[BZ_RUNA]++;
|
cannam@89
|
183 }
|
cannam@89
|
184 if (zPend < 2) break;
|
cannam@89
|
185 zPend = (zPend - 2) / 2;
|
cannam@89
|
186 };
|
cannam@89
|
187 zPend = 0;
|
cannam@89
|
188 }
|
cannam@89
|
189 {
|
cannam@89
|
190 register UChar rtmp;
|
cannam@89
|
191 register UChar* ryy_j;
|
cannam@89
|
192 register UChar rll_i;
|
cannam@89
|
193 rtmp = yy[1];
|
cannam@89
|
194 yy[1] = yy[0];
|
cannam@89
|
195 ryy_j = &(yy[1]);
|
cannam@89
|
196 rll_i = ll_i;
|
cannam@89
|
197 while ( rll_i != rtmp ) {
|
cannam@89
|
198 register UChar rtmp2;
|
cannam@89
|
199 ryy_j++;
|
cannam@89
|
200 rtmp2 = rtmp;
|
cannam@89
|
201 rtmp = *ryy_j;
|
cannam@89
|
202 *ryy_j = rtmp2;
|
cannam@89
|
203 };
|
cannam@89
|
204 yy[0] = rtmp;
|
cannam@89
|
205 j = ryy_j - &(yy[0]);
|
cannam@89
|
206 mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
|
cannam@89
|
207 }
|
cannam@89
|
208
|
cannam@89
|
209 }
|
cannam@89
|
210 }
|
cannam@89
|
211
|
cannam@89
|
212 if (zPend > 0) {
|
cannam@89
|
213 zPend--;
|
cannam@89
|
214 while (True) {
|
cannam@89
|
215 if (zPend & 1) {
|
cannam@89
|
216 mtfv[wr] = BZ_RUNB; wr++;
|
cannam@89
|
217 s->mtfFreq[BZ_RUNB]++;
|
cannam@89
|
218 } else {
|
cannam@89
|
219 mtfv[wr] = BZ_RUNA; wr++;
|
cannam@89
|
220 s->mtfFreq[BZ_RUNA]++;
|
cannam@89
|
221 }
|
cannam@89
|
222 if (zPend < 2) break;
|
cannam@89
|
223 zPend = (zPend - 2) / 2;
|
cannam@89
|
224 };
|
cannam@89
|
225 zPend = 0;
|
cannam@89
|
226 }
|
cannam@89
|
227
|
cannam@89
|
228 mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++;
|
cannam@89
|
229
|
cannam@89
|
230 s->nMTF = wr;
|
cannam@89
|
231 }
|
cannam@89
|
232
|
cannam@89
|
233
|
cannam@89
|
234 /*---------------------------------------------------*/
|
cannam@89
|
235 #define BZ_LESSER_ICOST 0
|
cannam@89
|
236 #define BZ_GREATER_ICOST 15
|
cannam@89
|
237
|
cannam@89
|
238 static
|
cannam@89
|
239 void sendMTFValues ( EState* s )
|
cannam@89
|
240 {
|
cannam@89
|
241 Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
|
cannam@89
|
242 Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
|
cannam@89
|
243 Int32 nGroups, nBytes;
|
cannam@89
|
244
|
cannam@89
|
245 /*--
|
cannam@89
|
246 UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
|
cannam@89
|
247 is a global since the decoder also needs it.
|
cannam@89
|
248
|
cannam@89
|
249 Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
|
cannam@89
|
250 Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
|
cannam@89
|
251 are also globals only used in this proc.
|
cannam@89
|
252 Made global to keep stack frame size small.
|
cannam@89
|
253 --*/
|
cannam@89
|
254
|
cannam@89
|
255
|
cannam@89
|
256 UInt16 cost[BZ_N_GROUPS];
|
cannam@89
|
257 Int32 fave[BZ_N_GROUPS];
|
cannam@89
|
258
|
cannam@89
|
259 UInt16* mtfv = s->mtfv;
|
cannam@89
|
260
|
cannam@89
|
261 if (s->verbosity >= 3)
|
cannam@89
|
262 VPrintf3( " %d in block, %d after MTF & 1-2 coding, "
|
cannam@89
|
263 "%d+2 syms in use\n",
|
cannam@89
|
264 s->nblock, s->nMTF, s->nInUse );
|
cannam@89
|
265
|
cannam@89
|
266 alphaSize = s->nInUse+2;
|
cannam@89
|
267 for (t = 0; t < BZ_N_GROUPS; t++)
|
cannam@89
|
268 for (v = 0; v < alphaSize; v++)
|
cannam@89
|
269 s->len[t][v] = BZ_GREATER_ICOST;
|
cannam@89
|
270
|
cannam@89
|
271 /*--- Decide how many coding tables to use ---*/
|
cannam@89
|
272 AssertH ( s->nMTF > 0, 3001 );
|
cannam@89
|
273 if (s->nMTF < 200) nGroups = 2; else
|
cannam@89
|
274 if (s->nMTF < 600) nGroups = 3; else
|
cannam@89
|
275 if (s->nMTF < 1200) nGroups = 4; else
|
cannam@89
|
276 if (s->nMTF < 2400) nGroups = 5; else
|
cannam@89
|
277 nGroups = 6;
|
cannam@89
|
278
|
cannam@89
|
279 /*--- Generate an initial set of coding tables ---*/
|
cannam@89
|
280 {
|
cannam@89
|
281 Int32 nPart, remF, tFreq, aFreq;
|
cannam@89
|
282
|
cannam@89
|
283 nPart = nGroups;
|
cannam@89
|
284 remF = s->nMTF;
|
cannam@89
|
285 gs = 0;
|
cannam@89
|
286 while (nPart > 0) {
|
cannam@89
|
287 tFreq = remF / nPart;
|
cannam@89
|
288 ge = gs-1;
|
cannam@89
|
289 aFreq = 0;
|
cannam@89
|
290 while (aFreq < tFreq && ge < alphaSize-1) {
|
cannam@89
|
291 ge++;
|
cannam@89
|
292 aFreq += s->mtfFreq[ge];
|
cannam@89
|
293 }
|
cannam@89
|
294
|
cannam@89
|
295 if (ge > gs
|
cannam@89
|
296 && nPart != nGroups && nPart != 1
|
cannam@89
|
297 && ((nGroups-nPart) % 2 == 1)) {
|
cannam@89
|
298 aFreq -= s->mtfFreq[ge];
|
cannam@89
|
299 ge--;
|
cannam@89
|
300 }
|
cannam@89
|
301
|
cannam@89
|
302 if (s->verbosity >= 3)
|
cannam@89
|
303 VPrintf5( " initial group %d, [%d .. %d], "
|
cannam@89
|
304 "has %d syms (%4.1f%%)\n",
|
cannam@89
|
305 nPart, gs, ge, aFreq,
|
cannam@89
|
306 (100.0 * (float)aFreq) / (float)(s->nMTF) );
|
cannam@89
|
307
|
cannam@89
|
308 for (v = 0; v < alphaSize; v++)
|
cannam@89
|
309 if (v >= gs && v <= ge)
|
cannam@89
|
310 s->len[nPart-1][v] = BZ_LESSER_ICOST; else
|
cannam@89
|
311 s->len[nPart-1][v] = BZ_GREATER_ICOST;
|
cannam@89
|
312
|
cannam@89
|
313 nPart--;
|
cannam@89
|
314 gs = ge+1;
|
cannam@89
|
315 remF -= aFreq;
|
cannam@89
|
316 }
|
cannam@89
|
317 }
|
cannam@89
|
318
|
cannam@89
|
319 /*---
|
cannam@89
|
320 Iterate up to BZ_N_ITERS times to improve the tables.
|
cannam@89
|
321 ---*/
|
cannam@89
|
322 for (iter = 0; iter < BZ_N_ITERS; iter++) {
|
cannam@89
|
323
|
cannam@89
|
324 for (t = 0; t < nGroups; t++) fave[t] = 0;
|
cannam@89
|
325
|
cannam@89
|
326 for (t = 0; t < nGroups; t++)
|
cannam@89
|
327 for (v = 0; v < alphaSize; v++)
|
cannam@89
|
328 s->rfreq[t][v] = 0;
|
cannam@89
|
329
|
cannam@89
|
330 /*---
|
cannam@89
|
331 Set up an auxiliary length table which is used to fast-track
|
cannam@89
|
332 the common case (nGroups == 6).
|
cannam@89
|
333 ---*/
|
cannam@89
|
334 if (nGroups == 6) {
|
cannam@89
|
335 for (v = 0; v < alphaSize; v++) {
|
cannam@89
|
336 s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
|
cannam@89
|
337 s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
|
cannam@89
|
338 s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
|
cannam@89
|
339 }
|
cannam@89
|
340 }
|
cannam@89
|
341
|
cannam@89
|
342 nSelectors = 0;
|
cannam@89
|
343 totc = 0;
|
cannam@89
|
344 gs = 0;
|
cannam@89
|
345 while (True) {
|
cannam@89
|
346
|
cannam@89
|
347 /*--- Set group start & end marks. --*/
|
cannam@89
|
348 if (gs >= s->nMTF) break;
|
cannam@89
|
349 ge = gs + BZ_G_SIZE - 1;
|
cannam@89
|
350 if (ge >= s->nMTF) ge = s->nMTF-1;
|
cannam@89
|
351
|
cannam@89
|
352 /*--
|
cannam@89
|
353 Calculate the cost of this group as coded
|
cannam@89
|
354 by each of the coding tables.
|
cannam@89
|
355 --*/
|
cannam@89
|
356 for (t = 0; t < nGroups; t++) cost[t] = 0;
|
cannam@89
|
357
|
cannam@89
|
358 if (nGroups == 6 && 50 == ge-gs+1) {
|
cannam@89
|
359 /*--- fast track the common case ---*/
|
cannam@89
|
360 register UInt32 cost01, cost23, cost45;
|
cannam@89
|
361 register UInt16 icv;
|
cannam@89
|
362 cost01 = cost23 = cost45 = 0;
|
cannam@89
|
363
|
cannam@89
|
364 # define BZ_ITER(nn) \
|
cannam@89
|
365 icv = mtfv[gs+(nn)]; \
|
cannam@89
|
366 cost01 += s->len_pack[icv][0]; \
|
cannam@89
|
367 cost23 += s->len_pack[icv][1]; \
|
cannam@89
|
368 cost45 += s->len_pack[icv][2]; \
|
cannam@89
|
369
|
cannam@89
|
370 BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4);
|
cannam@89
|
371 BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9);
|
cannam@89
|
372 BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
|
cannam@89
|
373 BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
|
cannam@89
|
374 BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
|
cannam@89
|
375 BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
|
cannam@89
|
376 BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
|
cannam@89
|
377 BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
|
cannam@89
|
378 BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
|
cannam@89
|
379 BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
|
cannam@89
|
380
|
cannam@89
|
381 # undef BZ_ITER
|
cannam@89
|
382
|
cannam@89
|
383 cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
|
cannam@89
|
384 cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
|
cannam@89
|
385 cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
|
cannam@89
|
386
|
cannam@89
|
387 } else {
|
cannam@89
|
388 /*--- slow version which correctly handles all situations ---*/
|
cannam@89
|
389 for (i = gs; i <= ge; i++) {
|
cannam@89
|
390 UInt16 icv = mtfv[i];
|
cannam@89
|
391 for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
|
cannam@89
|
392 }
|
cannam@89
|
393 }
|
cannam@89
|
394
|
cannam@89
|
395 /*--
|
cannam@89
|
396 Find the coding table which is best for this group,
|
cannam@89
|
397 and record its identity in the selector table.
|
cannam@89
|
398 --*/
|
cannam@89
|
399 bc = 999999999; bt = -1;
|
cannam@89
|
400 for (t = 0; t < nGroups; t++)
|
cannam@89
|
401 if (cost[t] < bc) { bc = cost[t]; bt = t; };
|
cannam@89
|
402 totc += bc;
|
cannam@89
|
403 fave[bt]++;
|
cannam@89
|
404 s->selector[nSelectors] = bt;
|
cannam@89
|
405 nSelectors++;
|
cannam@89
|
406
|
cannam@89
|
407 /*--
|
cannam@89
|
408 Increment the symbol frequencies for the selected table.
|
cannam@89
|
409 --*/
|
cannam@89
|
410 if (nGroups == 6 && 50 == ge-gs+1) {
|
cannam@89
|
411 /*--- fast track the common case ---*/
|
cannam@89
|
412
|
cannam@89
|
413 # define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++
|
cannam@89
|
414
|
cannam@89
|
415 BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4);
|
cannam@89
|
416 BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9);
|
cannam@89
|
417 BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
|
cannam@89
|
418 BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
|
cannam@89
|
419 BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
|
cannam@89
|
420 BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
|
cannam@89
|
421 BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
|
cannam@89
|
422 BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
|
cannam@89
|
423 BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
|
cannam@89
|
424 BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
|
cannam@89
|
425
|
cannam@89
|
426 # undef BZ_ITUR
|
cannam@89
|
427
|
cannam@89
|
428 } else {
|
cannam@89
|
429 /*--- slow version which correctly handles all situations ---*/
|
cannam@89
|
430 for (i = gs; i <= ge; i++)
|
cannam@89
|
431 s->rfreq[bt][ mtfv[i] ]++;
|
cannam@89
|
432 }
|
cannam@89
|
433
|
cannam@89
|
434 gs = ge+1;
|
cannam@89
|
435 }
|
cannam@89
|
436 if (s->verbosity >= 3) {
|
cannam@89
|
437 VPrintf2 ( " pass %d: size is %d, grp uses are ",
|
cannam@89
|
438 iter+1, totc/8 );
|
cannam@89
|
439 for (t = 0; t < nGroups; t++)
|
cannam@89
|
440 VPrintf1 ( "%d ", fave[t] );
|
cannam@89
|
441 VPrintf0 ( "\n" );
|
cannam@89
|
442 }
|
cannam@89
|
443
|
cannam@89
|
444 /*--
|
cannam@89
|
445 Recompute the tables based on the accumulated frequencies.
|
cannam@89
|
446 --*/
|
cannam@89
|
447 /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See
|
cannam@89
|
448 comment in huffman.c for details. */
|
cannam@89
|
449 for (t = 0; t < nGroups; t++)
|
cannam@89
|
450 BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]),
|
cannam@89
|
451 alphaSize, 17 /*20*/ );
|
cannam@89
|
452 }
|
cannam@89
|
453
|
cannam@89
|
454
|
cannam@89
|
455 AssertH( nGroups < 8, 3002 );
|
cannam@89
|
456 AssertH( nSelectors < 32768 &&
|
cannam@89
|
457 nSelectors <= (2 + (900000 / BZ_G_SIZE)),
|
cannam@89
|
458 3003 );
|
cannam@89
|
459
|
cannam@89
|
460
|
cannam@89
|
461 /*--- Compute MTF values for the selectors. ---*/
|
cannam@89
|
462 {
|
cannam@89
|
463 UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
|
cannam@89
|
464 for (i = 0; i < nGroups; i++) pos[i] = i;
|
cannam@89
|
465 for (i = 0; i < nSelectors; i++) {
|
cannam@89
|
466 ll_i = s->selector[i];
|
cannam@89
|
467 j = 0;
|
cannam@89
|
468 tmp = pos[j];
|
cannam@89
|
469 while ( ll_i != tmp ) {
|
cannam@89
|
470 j++;
|
cannam@89
|
471 tmp2 = tmp;
|
cannam@89
|
472 tmp = pos[j];
|
cannam@89
|
473 pos[j] = tmp2;
|
cannam@89
|
474 };
|
cannam@89
|
475 pos[0] = tmp;
|
cannam@89
|
476 s->selectorMtf[i] = j;
|
cannam@89
|
477 }
|
cannam@89
|
478 };
|
cannam@89
|
479
|
cannam@89
|
480 /*--- Assign actual codes for the tables. --*/
|
cannam@89
|
481 for (t = 0; t < nGroups; t++) {
|
cannam@89
|
482 minLen = 32;
|
cannam@89
|
483 maxLen = 0;
|
cannam@89
|
484 for (i = 0; i < alphaSize; i++) {
|
cannam@89
|
485 if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
|
cannam@89
|
486 if (s->len[t][i] < minLen) minLen = s->len[t][i];
|
cannam@89
|
487 }
|
cannam@89
|
488 AssertH ( !(maxLen > 17 /*20*/ ), 3004 );
|
cannam@89
|
489 AssertH ( !(minLen < 1), 3005 );
|
cannam@89
|
490 BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]),
|
cannam@89
|
491 minLen, maxLen, alphaSize );
|
cannam@89
|
492 }
|
cannam@89
|
493
|
cannam@89
|
494 /*--- Transmit the mapping table. ---*/
|
cannam@89
|
495 {
|
cannam@89
|
496 Bool inUse16[16];
|
cannam@89
|
497 for (i = 0; i < 16; i++) {
|
cannam@89
|
498 inUse16[i] = False;
|
cannam@89
|
499 for (j = 0; j < 16; j++)
|
cannam@89
|
500 if (s->inUse[i * 16 + j]) inUse16[i] = True;
|
cannam@89
|
501 }
|
cannam@89
|
502
|
cannam@89
|
503 nBytes = s->numZ;
|
cannam@89
|
504 for (i = 0; i < 16; i++)
|
cannam@89
|
505 if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0);
|
cannam@89
|
506
|
cannam@89
|
507 for (i = 0; i < 16; i++)
|
cannam@89
|
508 if (inUse16[i])
|
cannam@89
|
509 for (j = 0; j < 16; j++) {
|
cannam@89
|
510 if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0);
|
cannam@89
|
511 }
|
cannam@89
|
512
|
cannam@89
|
513 if (s->verbosity >= 3)
|
cannam@89
|
514 VPrintf1( " bytes: mapping %d, ", s->numZ-nBytes );
|
cannam@89
|
515 }
|
cannam@89
|
516
|
cannam@89
|
517 /*--- Now the selectors. ---*/
|
cannam@89
|
518 nBytes = s->numZ;
|
cannam@89
|
519 bsW ( s, 3, nGroups );
|
cannam@89
|
520 bsW ( s, 15, nSelectors );
|
cannam@89
|
521 for (i = 0; i < nSelectors; i++) {
|
cannam@89
|
522 for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1);
|
cannam@89
|
523 bsW(s,1,0);
|
cannam@89
|
524 }
|
cannam@89
|
525 if (s->verbosity >= 3)
|
cannam@89
|
526 VPrintf1( "selectors %d, ", s->numZ-nBytes );
|
cannam@89
|
527
|
cannam@89
|
528 /*--- Now the coding tables. ---*/
|
cannam@89
|
529 nBytes = s->numZ;
|
cannam@89
|
530
|
cannam@89
|
531 for (t = 0; t < nGroups; t++) {
|
cannam@89
|
532 Int32 curr = s->len[t][0];
|
cannam@89
|
533 bsW ( s, 5, curr );
|
cannam@89
|
534 for (i = 0; i < alphaSize; i++) {
|
cannam@89
|
535 while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ };
|
cannam@89
|
536 while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ };
|
cannam@89
|
537 bsW ( s, 1, 0 );
|
cannam@89
|
538 }
|
cannam@89
|
539 }
|
cannam@89
|
540
|
cannam@89
|
541 if (s->verbosity >= 3)
|
cannam@89
|
542 VPrintf1 ( "code lengths %d, ", s->numZ-nBytes );
|
cannam@89
|
543
|
cannam@89
|
544 /*--- And finally, the block data proper ---*/
|
cannam@89
|
545 nBytes = s->numZ;
|
cannam@89
|
546 selCtr = 0;
|
cannam@89
|
547 gs = 0;
|
cannam@89
|
548 while (True) {
|
cannam@89
|
549 if (gs >= s->nMTF) break;
|
cannam@89
|
550 ge = gs + BZ_G_SIZE - 1;
|
cannam@89
|
551 if (ge >= s->nMTF) ge = s->nMTF-1;
|
cannam@89
|
552 AssertH ( s->selector[selCtr] < nGroups, 3006 );
|
cannam@89
|
553
|
cannam@89
|
554 if (nGroups == 6 && 50 == ge-gs+1) {
|
cannam@89
|
555 /*--- fast track the common case ---*/
|
cannam@89
|
556 UInt16 mtfv_i;
|
cannam@89
|
557 UChar* s_len_sel_selCtr
|
cannam@89
|
558 = &(s->len[s->selector[selCtr]][0]);
|
cannam@89
|
559 Int32* s_code_sel_selCtr
|
cannam@89
|
560 = &(s->code[s->selector[selCtr]][0]);
|
cannam@89
|
561
|
cannam@89
|
562 # define BZ_ITAH(nn) \
|
cannam@89
|
563 mtfv_i = mtfv[gs+(nn)]; \
|
cannam@89
|
564 bsW ( s, \
|
cannam@89
|
565 s_len_sel_selCtr[mtfv_i], \
|
cannam@89
|
566 s_code_sel_selCtr[mtfv_i] )
|
cannam@89
|
567
|
cannam@89
|
568 BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4);
|
cannam@89
|
569 BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9);
|
cannam@89
|
570 BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
|
cannam@89
|
571 BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
|
cannam@89
|
572 BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
|
cannam@89
|
573 BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
|
cannam@89
|
574 BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
|
cannam@89
|
575 BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
|
cannam@89
|
576 BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
|
cannam@89
|
577 BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
|
cannam@89
|
578
|
cannam@89
|
579 # undef BZ_ITAH
|
cannam@89
|
580
|
cannam@89
|
581 } else {
|
cannam@89
|
582 /*--- slow version which correctly handles all situations ---*/
|
cannam@89
|
583 for (i = gs; i <= ge; i++) {
|
cannam@89
|
584 bsW ( s,
|
cannam@89
|
585 s->len [s->selector[selCtr]] [mtfv[i]],
|
cannam@89
|
586 s->code [s->selector[selCtr]] [mtfv[i]] );
|
cannam@89
|
587 }
|
cannam@89
|
588 }
|
cannam@89
|
589
|
cannam@89
|
590
|
cannam@89
|
591 gs = ge+1;
|
cannam@89
|
592 selCtr++;
|
cannam@89
|
593 }
|
cannam@89
|
594 AssertH( selCtr == nSelectors, 3007 );
|
cannam@89
|
595
|
cannam@89
|
596 if (s->verbosity >= 3)
|
cannam@89
|
597 VPrintf1( "codes %d\n", s->numZ-nBytes );
|
cannam@89
|
598 }
|
cannam@89
|
599
|
cannam@89
|
600
|
cannam@89
|
601 /*---------------------------------------------------*/
|
cannam@89
|
602 void BZ2_compressBlock ( EState* s, Bool is_last_block )
|
cannam@89
|
603 {
|
cannam@89
|
604 if (s->nblock > 0) {
|
cannam@89
|
605
|
cannam@89
|
606 BZ_FINALISE_CRC ( s->blockCRC );
|
cannam@89
|
607 s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
|
cannam@89
|
608 s->combinedCRC ^= s->blockCRC;
|
cannam@89
|
609 if (s->blockNo > 1) s->numZ = 0;
|
cannam@89
|
610
|
cannam@89
|
611 if (s->verbosity >= 2)
|
cannam@89
|
612 VPrintf4( " block %d: crc = 0x%08x, "
|
cannam@89
|
613 "combined CRC = 0x%08x, size = %d\n",
|
cannam@89
|
614 s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
|
cannam@89
|
615
|
cannam@89
|
616 BZ2_blockSort ( s );
|
cannam@89
|
617 }
|
cannam@89
|
618
|
cannam@89
|
619 s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]);
|
cannam@89
|
620
|
cannam@89
|
621 /*-- If this is the first block, create the stream header. --*/
|
cannam@89
|
622 if (s->blockNo == 1) {
|
cannam@89
|
623 BZ2_bsInitWrite ( s );
|
cannam@89
|
624 bsPutUChar ( s, BZ_HDR_B );
|
cannam@89
|
625 bsPutUChar ( s, BZ_HDR_Z );
|
cannam@89
|
626 bsPutUChar ( s, BZ_HDR_h );
|
cannam@89
|
627 bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) );
|
cannam@89
|
628 }
|
cannam@89
|
629
|
cannam@89
|
630 if (s->nblock > 0) {
|
cannam@89
|
631
|
cannam@89
|
632 bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 );
|
cannam@89
|
633 bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 );
|
cannam@89
|
634 bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 );
|
cannam@89
|
635
|
cannam@89
|
636 /*-- Now the block's CRC, so it is in a known place. --*/
|
cannam@89
|
637 bsPutUInt32 ( s, s->blockCRC );
|
cannam@89
|
638
|
cannam@89
|
639 /*--
|
cannam@89
|
640 Now a single bit indicating (non-)randomisation.
|
cannam@89
|
641 As of version 0.9.5, we use a better sorting algorithm
|
cannam@89
|
642 which makes randomisation unnecessary. So always set
|
cannam@89
|
643 the randomised bit to 'no'. Of course, the decoder
|
cannam@89
|
644 still needs to be able to handle randomised blocks
|
cannam@89
|
645 so as to maintain backwards compatibility with
|
cannam@89
|
646 older versions of bzip2.
|
cannam@89
|
647 --*/
|
cannam@89
|
648 bsW(s,1,0);
|
cannam@89
|
649
|
cannam@89
|
650 bsW ( s, 24, s->origPtr );
|
cannam@89
|
651 generateMTFValues ( s );
|
cannam@89
|
652 sendMTFValues ( s );
|
cannam@89
|
653 }
|
cannam@89
|
654
|
cannam@89
|
655
|
cannam@89
|
656 /*-- If this is the last block, add the stream trailer. --*/
|
cannam@89
|
657 if (is_last_block) {
|
cannam@89
|
658
|
cannam@89
|
659 bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 );
|
cannam@89
|
660 bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 );
|
cannam@89
|
661 bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 );
|
cannam@89
|
662 bsPutUInt32 ( s, s->combinedCRC );
|
cannam@89
|
663 if (s->verbosity >= 2)
|
cannam@89
|
664 VPrintf1( " final combined CRC = 0x%08x\n ", s->combinedCRC );
|
cannam@89
|
665 bsFinishWrite ( s );
|
cannam@89
|
666 }
|
cannam@89
|
667 }
|
cannam@89
|
668
|
cannam@89
|
669
|
cannam@89
|
670 /*-------------------------------------------------------------*/
|
cannam@89
|
671 /*--- end compress.c ---*/
|
cannam@89
|
672 /*-------------------------------------------------------------*/
|