annotate src/zlib-1.2.7/deflate.h @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents e13257ea84a4
children
rev   line source
Chris@4 1 /* deflate.h -- internal compression state
Chris@4 2 * Copyright (C) 1995-2012 Jean-loup Gailly
Chris@4 3 * For conditions of distribution and use, see copyright notice in zlib.h
Chris@4 4 */
Chris@4 5
Chris@4 6 /* WARNING: this file should *not* be used by applications. It is
Chris@4 7 part of the implementation of the compression library and is
Chris@4 8 subject to change. Applications should only use zlib.h.
Chris@4 9 */
Chris@4 10
Chris@4 11 /* @(#) $Id$ */
Chris@4 12
Chris@4 13 #ifndef DEFLATE_H
Chris@4 14 #define DEFLATE_H
Chris@4 15
Chris@4 16 #include "zutil.h"
Chris@4 17
Chris@4 18 /* define NO_GZIP when compiling if you want to disable gzip header and
Chris@4 19 trailer creation by deflate(). NO_GZIP would be used to avoid linking in
Chris@4 20 the crc code when it is not needed. For shared libraries, gzip encoding
Chris@4 21 should be left enabled. */
Chris@4 22 #ifndef NO_GZIP
Chris@4 23 # define GZIP
Chris@4 24 #endif
Chris@4 25
Chris@4 26 /* ===========================================================================
Chris@4 27 * Internal compression state.
Chris@4 28 */
Chris@4 29
Chris@4 30 #define LENGTH_CODES 29
Chris@4 31 /* number of length codes, not counting the special END_BLOCK code */
Chris@4 32
Chris@4 33 #define LITERALS 256
Chris@4 34 /* number of literal bytes 0..255 */
Chris@4 35
Chris@4 36 #define L_CODES (LITERALS+1+LENGTH_CODES)
Chris@4 37 /* number of Literal or Length codes, including the END_BLOCK code */
Chris@4 38
Chris@4 39 #define D_CODES 30
Chris@4 40 /* number of distance codes */
Chris@4 41
Chris@4 42 #define BL_CODES 19
Chris@4 43 /* number of codes used to transfer the bit lengths */
Chris@4 44
Chris@4 45 #define HEAP_SIZE (2*L_CODES+1)
Chris@4 46 /* maximum heap size */
Chris@4 47
Chris@4 48 #define MAX_BITS 15
Chris@4 49 /* All codes must not exceed MAX_BITS bits */
Chris@4 50
Chris@4 51 #define Buf_size 16
Chris@4 52 /* size of bit buffer in bi_buf */
Chris@4 53
Chris@4 54 #define INIT_STATE 42
Chris@4 55 #define EXTRA_STATE 69
Chris@4 56 #define NAME_STATE 73
Chris@4 57 #define COMMENT_STATE 91
Chris@4 58 #define HCRC_STATE 103
Chris@4 59 #define BUSY_STATE 113
Chris@4 60 #define FINISH_STATE 666
Chris@4 61 /* Stream status */
Chris@4 62
Chris@4 63
Chris@4 64 /* Data structure describing a single value and its code string. */
Chris@4 65 typedef struct ct_data_s {
Chris@4 66 union {
Chris@4 67 ush freq; /* frequency count */
Chris@4 68 ush code; /* bit string */
Chris@4 69 } fc;
Chris@4 70 union {
Chris@4 71 ush dad; /* father node in Huffman tree */
Chris@4 72 ush len; /* length of bit string */
Chris@4 73 } dl;
Chris@4 74 } FAR ct_data;
Chris@4 75
Chris@4 76 #define Freq fc.freq
Chris@4 77 #define Code fc.code
Chris@4 78 #define Dad dl.dad
Chris@4 79 #define Len dl.len
Chris@4 80
Chris@4 81 typedef struct static_tree_desc_s static_tree_desc;
Chris@4 82
Chris@4 83 typedef struct tree_desc_s {
Chris@4 84 ct_data *dyn_tree; /* the dynamic tree */
Chris@4 85 int max_code; /* largest code with non zero frequency */
Chris@4 86 static_tree_desc *stat_desc; /* the corresponding static tree */
Chris@4 87 } FAR tree_desc;
Chris@4 88
Chris@4 89 typedef ush Pos;
Chris@4 90 typedef Pos FAR Posf;
Chris@4 91 typedef unsigned IPos;
Chris@4 92
Chris@4 93 /* A Pos is an index in the character window. We use short instead of int to
Chris@4 94 * save space in the various tables. IPos is used only for parameter passing.
Chris@4 95 */
Chris@4 96
Chris@4 97 typedef struct internal_state {
Chris@4 98 z_streamp strm; /* pointer back to this zlib stream */
Chris@4 99 int status; /* as the name implies */
Chris@4 100 Bytef *pending_buf; /* output still pending */
Chris@4 101 ulg pending_buf_size; /* size of pending_buf */
Chris@4 102 Bytef *pending_out; /* next pending byte to output to the stream */
Chris@4 103 uInt pending; /* nb of bytes in the pending buffer */
Chris@4 104 int wrap; /* bit 0 true for zlib, bit 1 true for gzip */
Chris@4 105 gz_headerp gzhead; /* gzip header information to write */
Chris@4 106 uInt gzindex; /* where in extra, name, or comment */
Chris@4 107 Byte method; /* STORED (for zip only) or DEFLATED */
Chris@4 108 int last_flush; /* value of flush param for previous deflate call */
Chris@4 109
Chris@4 110 /* used by deflate.c: */
Chris@4 111
Chris@4 112 uInt w_size; /* LZ77 window size (32K by default) */
Chris@4 113 uInt w_bits; /* log2(w_size) (8..16) */
Chris@4 114 uInt w_mask; /* w_size - 1 */
Chris@4 115
Chris@4 116 Bytef *window;
Chris@4 117 /* Sliding window. Input bytes are read into the second half of the window,
Chris@4 118 * and move to the first half later to keep a dictionary of at least wSize
Chris@4 119 * bytes. With this organization, matches are limited to a distance of
Chris@4 120 * wSize-MAX_MATCH bytes, but this ensures that IO is always
Chris@4 121 * performed with a length multiple of the block size. Also, it limits
Chris@4 122 * the window size to 64K, which is quite useful on MSDOS.
Chris@4 123 * To do: use the user input buffer as sliding window.
Chris@4 124 */
Chris@4 125
Chris@4 126 ulg window_size;
Chris@4 127 /* Actual size of window: 2*wSize, except when the user input buffer
Chris@4 128 * is directly used as sliding window.
Chris@4 129 */
Chris@4 130
Chris@4 131 Posf *prev;
Chris@4 132 /* Link to older string with same hash index. To limit the size of this
Chris@4 133 * array to 64K, this link is maintained only for the last 32K strings.
Chris@4 134 * An index in this array is thus a window index modulo 32K.
Chris@4 135 */
Chris@4 136
Chris@4 137 Posf *head; /* Heads of the hash chains or NIL. */
Chris@4 138
Chris@4 139 uInt ins_h; /* hash index of string to be inserted */
Chris@4 140 uInt hash_size; /* number of elements in hash table */
Chris@4 141 uInt hash_bits; /* log2(hash_size) */
Chris@4 142 uInt hash_mask; /* hash_size-1 */
Chris@4 143
Chris@4 144 uInt hash_shift;
Chris@4 145 /* Number of bits by which ins_h must be shifted at each input
Chris@4 146 * step. It must be such that after MIN_MATCH steps, the oldest
Chris@4 147 * byte no longer takes part in the hash key, that is:
Chris@4 148 * hash_shift * MIN_MATCH >= hash_bits
Chris@4 149 */
Chris@4 150
Chris@4 151 long block_start;
Chris@4 152 /* Window position at the beginning of the current output block. Gets
Chris@4 153 * negative when the window is moved backwards.
Chris@4 154 */
Chris@4 155
Chris@4 156 uInt match_length; /* length of best match */
Chris@4 157 IPos prev_match; /* previous match */
Chris@4 158 int match_available; /* set if previous match exists */
Chris@4 159 uInt strstart; /* start of string to insert */
Chris@4 160 uInt match_start; /* start of matching string */
Chris@4 161 uInt lookahead; /* number of valid bytes ahead in window */
Chris@4 162
Chris@4 163 uInt prev_length;
Chris@4 164 /* Length of the best match at previous step. Matches not greater than this
Chris@4 165 * are discarded. This is used in the lazy match evaluation.
Chris@4 166 */
Chris@4 167
Chris@4 168 uInt max_chain_length;
Chris@4 169 /* To speed up deflation, hash chains are never searched beyond this
Chris@4 170 * length. A higher limit improves compression ratio but degrades the
Chris@4 171 * speed.
Chris@4 172 */
Chris@4 173
Chris@4 174 uInt max_lazy_match;
Chris@4 175 /* Attempt to find a better match only when the current match is strictly
Chris@4 176 * smaller than this value. This mechanism is used only for compression
Chris@4 177 * levels >= 4.
Chris@4 178 */
Chris@4 179 # define max_insert_length max_lazy_match
Chris@4 180 /* Insert new strings in the hash table only if the match length is not
Chris@4 181 * greater than this length. This saves time but degrades compression.
Chris@4 182 * max_insert_length is used only for compression levels <= 3.
Chris@4 183 */
Chris@4 184
Chris@4 185 int level; /* compression level (1..9) */
Chris@4 186 int strategy; /* favor or force Huffman coding*/
Chris@4 187
Chris@4 188 uInt good_match;
Chris@4 189 /* Use a faster search when the previous match is longer than this */
Chris@4 190
Chris@4 191 int nice_match; /* Stop searching when current match exceeds this */
Chris@4 192
Chris@4 193 /* used by trees.c: */
Chris@4 194 /* Didn't use ct_data typedef below to suppress compiler warning */
Chris@4 195 struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */
Chris@4 196 struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
Chris@4 197 struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */
Chris@4 198
Chris@4 199 struct tree_desc_s l_desc; /* desc. for literal tree */
Chris@4 200 struct tree_desc_s d_desc; /* desc. for distance tree */
Chris@4 201 struct tree_desc_s bl_desc; /* desc. for bit length tree */
Chris@4 202
Chris@4 203 ush bl_count[MAX_BITS+1];
Chris@4 204 /* number of codes at each bit length for an optimal tree */
Chris@4 205
Chris@4 206 int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */
Chris@4 207 int heap_len; /* number of elements in the heap */
Chris@4 208 int heap_max; /* element of largest frequency */
Chris@4 209 /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
Chris@4 210 * The same heap array is used to build all trees.
Chris@4 211 */
Chris@4 212
Chris@4 213 uch depth[2*L_CODES+1];
Chris@4 214 /* Depth of each subtree used as tie breaker for trees of equal frequency
Chris@4 215 */
Chris@4 216
Chris@4 217 uchf *l_buf; /* buffer for literals or lengths */
Chris@4 218
Chris@4 219 uInt lit_bufsize;
Chris@4 220 /* Size of match buffer for literals/lengths. There are 4 reasons for
Chris@4 221 * limiting lit_bufsize to 64K:
Chris@4 222 * - frequencies can be kept in 16 bit counters
Chris@4 223 * - if compression is not successful for the first block, all input
Chris@4 224 * data is still in the window so we can still emit a stored block even
Chris@4 225 * when input comes from standard input. (This can also be done for
Chris@4 226 * all blocks if lit_bufsize is not greater than 32K.)
Chris@4 227 * - if compression is not successful for a file smaller than 64K, we can
Chris@4 228 * even emit a stored file instead of a stored block (saving 5 bytes).
Chris@4 229 * This is applicable only for zip (not gzip or zlib).
Chris@4 230 * - creating new Huffman trees less frequently may not provide fast
Chris@4 231 * adaptation to changes in the input data statistics. (Take for
Chris@4 232 * example a binary file with poorly compressible code followed by
Chris@4 233 * a highly compressible string table.) Smaller buffer sizes give
Chris@4 234 * fast adaptation but have of course the overhead of transmitting
Chris@4 235 * trees more frequently.
Chris@4 236 * - I can't count above 4
Chris@4 237 */
Chris@4 238
Chris@4 239 uInt last_lit; /* running index in l_buf */
Chris@4 240
Chris@4 241 ushf *d_buf;
Chris@4 242 /* Buffer for distances. To simplify the code, d_buf and l_buf have
Chris@4 243 * the same number of elements. To use different lengths, an extra flag
Chris@4 244 * array would be necessary.
Chris@4 245 */
Chris@4 246
Chris@4 247 ulg opt_len; /* bit length of current block with optimal trees */
Chris@4 248 ulg static_len; /* bit length of current block with static trees */
Chris@4 249 uInt matches; /* number of string matches in current block */
Chris@4 250 uInt insert; /* bytes at end of window left to insert */
Chris@4 251
Chris@4 252 #ifdef DEBUG
Chris@4 253 ulg compressed_len; /* total bit length of compressed file mod 2^32 */
Chris@4 254 ulg bits_sent; /* bit length of compressed data sent mod 2^32 */
Chris@4 255 #endif
Chris@4 256
Chris@4 257 ush bi_buf;
Chris@4 258 /* Output buffer. bits are inserted starting at the bottom (least
Chris@4 259 * significant bits).
Chris@4 260 */
Chris@4 261 int bi_valid;
Chris@4 262 /* Number of valid bits in bi_buf. All bits above the last valid bit
Chris@4 263 * are always zero.
Chris@4 264 */
Chris@4 265
Chris@4 266 ulg high_water;
Chris@4 267 /* High water mark offset in window for initialized bytes -- bytes above
Chris@4 268 * this are set to zero in order to avoid memory check warnings when
Chris@4 269 * longest match routines access bytes past the input. This is then
Chris@4 270 * updated to the new high water mark.
Chris@4 271 */
Chris@4 272
Chris@4 273 } FAR deflate_state;
Chris@4 274
Chris@4 275 /* Output a byte on the stream.
Chris@4 276 * IN assertion: there is enough room in pending_buf.
Chris@4 277 */
Chris@4 278 #define put_byte(s, c) {s->pending_buf[s->pending++] = (c);}
Chris@4 279
Chris@4 280
Chris@4 281 #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
Chris@4 282 /* Minimum amount of lookahead, except at the end of the input file.
Chris@4 283 * See deflate.c for comments about the MIN_MATCH+1.
Chris@4 284 */
Chris@4 285
Chris@4 286 #define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD)
Chris@4 287 /* In order to simplify the code, particularly on 16 bit machines, match
Chris@4 288 * distances are limited to MAX_DIST instead of WSIZE.
Chris@4 289 */
Chris@4 290
Chris@4 291 #define WIN_INIT MAX_MATCH
Chris@4 292 /* Number of bytes after end of data in window to initialize in order to avoid
Chris@4 293 memory checker errors from longest match routines */
Chris@4 294
Chris@4 295 /* in trees.c */
Chris@4 296 void ZLIB_INTERNAL _tr_init OF((deflate_state *s));
Chris@4 297 int ZLIB_INTERNAL _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc));
Chris@4 298 void ZLIB_INTERNAL _tr_flush_block OF((deflate_state *s, charf *buf,
Chris@4 299 ulg stored_len, int last));
Chris@4 300 void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state *s));
Chris@4 301 void ZLIB_INTERNAL _tr_align OF((deflate_state *s));
Chris@4 302 void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,
Chris@4 303 ulg stored_len, int last));
Chris@4 304
Chris@4 305 #define d_code(dist) \
Chris@4 306 ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
Chris@4 307 /* Mapping from a distance to a distance code. dist is the distance - 1 and
Chris@4 308 * must not have side effects. _dist_code[256] and _dist_code[257] are never
Chris@4 309 * used.
Chris@4 310 */
Chris@4 311
Chris@4 312 #ifndef DEBUG
Chris@4 313 /* Inline versions of _tr_tally for speed: */
Chris@4 314
Chris@4 315 #if defined(GEN_TREES_H) || !defined(STDC)
Chris@4 316 extern uch ZLIB_INTERNAL _length_code[];
Chris@4 317 extern uch ZLIB_INTERNAL _dist_code[];
Chris@4 318 #else
Chris@4 319 extern const uch ZLIB_INTERNAL _length_code[];
Chris@4 320 extern const uch ZLIB_INTERNAL _dist_code[];
Chris@4 321 #endif
Chris@4 322
Chris@4 323 # define _tr_tally_lit(s, c, flush) \
Chris@4 324 { uch cc = (c); \
Chris@4 325 s->d_buf[s->last_lit] = 0; \
Chris@4 326 s->l_buf[s->last_lit++] = cc; \
Chris@4 327 s->dyn_ltree[cc].Freq++; \
Chris@4 328 flush = (s->last_lit == s->lit_bufsize-1); \
Chris@4 329 }
Chris@4 330 # define _tr_tally_dist(s, distance, length, flush) \
Chris@4 331 { uch len = (length); \
Chris@4 332 ush dist = (distance); \
Chris@4 333 s->d_buf[s->last_lit] = dist; \
Chris@4 334 s->l_buf[s->last_lit++] = len; \
Chris@4 335 dist--; \
Chris@4 336 s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
Chris@4 337 s->dyn_dtree[d_code(dist)].Freq++; \
Chris@4 338 flush = (s->last_lit == s->lit_bufsize-1); \
Chris@4 339 }
Chris@4 340 #else
Chris@4 341 # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
Chris@4 342 # define _tr_tally_dist(s, distance, length, flush) \
Chris@4 343 flush = _tr_tally(s, distance, length)
Chris@4 344 #endif
Chris@4 345
Chris@4 346 #endif /* DEFLATE_H */