annotate src/zlib-1.2.8/examples/gzjoin.c @ 169:223a55898ab9 tip default

Add null config files
author Chris Cannam <cannam@all-day-breakfast.com>
date Mon, 02 Mar 2020 14:03:47 +0000
parents 5b4145a0d408
children
rev   line source
cannam@128 1 /* gzjoin -- command to join gzip files into one gzip file
cannam@128 2
cannam@128 3 Copyright (C) 2004, 2005, 2012 Mark Adler, all rights reserved
cannam@128 4 version 1.2, 14 Aug 2012
cannam@128 5
cannam@128 6 This software is provided 'as-is', without any express or implied
cannam@128 7 warranty. In no event will the author be held liable for any damages
cannam@128 8 arising from the use of this software.
cannam@128 9
cannam@128 10 Permission is granted to anyone to use this software for any purpose,
cannam@128 11 including commercial applications, and to alter it and redistribute it
cannam@128 12 freely, subject to the following restrictions:
cannam@128 13
cannam@128 14 1. The origin of this software must not be misrepresented; you must not
cannam@128 15 claim that you wrote the original software. If you use this software
cannam@128 16 in a product, an acknowledgment in the product documentation would be
cannam@128 17 appreciated but is not required.
cannam@128 18 2. Altered source versions must be plainly marked as such, and must not be
cannam@128 19 misrepresented as being the original software.
cannam@128 20 3. This notice may not be removed or altered from any source distribution.
cannam@128 21
cannam@128 22 Mark Adler madler@alumni.caltech.edu
cannam@128 23 */
cannam@128 24
cannam@128 25 /*
cannam@128 26 * Change history:
cannam@128 27 *
cannam@128 28 * 1.0 11 Dec 2004 - First version
cannam@128 29 * 1.1 12 Jun 2005 - Changed ssize_t to long for portability
cannam@128 30 * 1.2 14 Aug 2012 - Clean up for z_const usage
cannam@128 31 */
cannam@128 32
cannam@128 33 /*
cannam@128 34 gzjoin takes one or more gzip files on the command line and writes out a
cannam@128 35 single gzip file that will uncompress to the concatenation of the
cannam@128 36 uncompressed data from the individual gzip files. gzjoin does this without
cannam@128 37 having to recompress any of the data and without having to calculate a new
cannam@128 38 crc32 for the concatenated uncompressed data. gzjoin does however have to
cannam@128 39 decompress all of the input data in order to find the bits in the compressed
cannam@128 40 data that need to be modified to concatenate the streams.
cannam@128 41
cannam@128 42 gzjoin does not do an integrity check on the input gzip files other than
cannam@128 43 checking the gzip header and decompressing the compressed data. They are
cannam@128 44 otherwise assumed to be complete and correct.
cannam@128 45
cannam@128 46 Each joint between gzip files removes at least 18 bytes of previous trailer
cannam@128 47 and subsequent header, and inserts an average of about three bytes to the
cannam@128 48 compressed data in order to connect the streams. The output gzip file
cannam@128 49 has a minimal ten-byte gzip header with no file name or modification time.
cannam@128 50
cannam@128 51 This program was written to illustrate the use of the Z_BLOCK option of
cannam@128 52 inflate() and the crc32_combine() function. gzjoin will not compile with
cannam@128 53 versions of zlib earlier than 1.2.3.
cannam@128 54 */
cannam@128 55
cannam@128 56 #include <stdio.h> /* fputs(), fprintf(), fwrite(), putc() */
cannam@128 57 #include <stdlib.h> /* exit(), malloc(), free() */
cannam@128 58 #include <fcntl.h> /* open() */
cannam@128 59 #include <unistd.h> /* close(), read(), lseek() */
cannam@128 60 #include "zlib.h"
cannam@128 61 /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */
cannam@128 62
cannam@128 63 #define local static
cannam@128 64
cannam@128 65 /* exit with an error (return a value to allow use in an expression) */
cannam@128 66 local int bail(char *why1, char *why2)
cannam@128 67 {
cannam@128 68 fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2);
cannam@128 69 exit(1);
cannam@128 70 return 0;
cannam@128 71 }
cannam@128 72
cannam@128 73 /* -- simple buffered file input with access to the buffer -- */
cannam@128 74
cannam@128 75 #define CHUNK 32768 /* must be a power of two and fit in unsigned */
cannam@128 76
cannam@128 77 /* bin buffered input file type */
cannam@128 78 typedef struct {
cannam@128 79 char *name; /* name of file for error messages */
cannam@128 80 int fd; /* file descriptor */
cannam@128 81 unsigned left; /* bytes remaining at next */
cannam@128 82 unsigned char *next; /* next byte to read */
cannam@128 83 unsigned char *buf; /* allocated buffer of length CHUNK */
cannam@128 84 } bin;
cannam@128 85
cannam@128 86 /* close a buffered file and free allocated memory */
cannam@128 87 local void bclose(bin *in)
cannam@128 88 {
cannam@128 89 if (in != NULL) {
cannam@128 90 if (in->fd != -1)
cannam@128 91 close(in->fd);
cannam@128 92 if (in->buf != NULL)
cannam@128 93 free(in->buf);
cannam@128 94 free(in);
cannam@128 95 }
cannam@128 96 }
cannam@128 97
cannam@128 98 /* open a buffered file for input, return a pointer to type bin, or NULL on
cannam@128 99 failure */
cannam@128 100 local bin *bopen(char *name)
cannam@128 101 {
cannam@128 102 bin *in;
cannam@128 103
cannam@128 104 in = malloc(sizeof(bin));
cannam@128 105 if (in == NULL)
cannam@128 106 return NULL;
cannam@128 107 in->buf = malloc(CHUNK);
cannam@128 108 in->fd = open(name, O_RDONLY, 0);
cannam@128 109 if (in->buf == NULL || in->fd == -1) {
cannam@128 110 bclose(in);
cannam@128 111 return NULL;
cannam@128 112 }
cannam@128 113 in->left = 0;
cannam@128 114 in->next = in->buf;
cannam@128 115 in->name = name;
cannam@128 116 return in;
cannam@128 117 }
cannam@128 118
cannam@128 119 /* load buffer from file, return -1 on read error, 0 or 1 on success, with
cannam@128 120 1 indicating that end-of-file was reached */
cannam@128 121 local int bload(bin *in)
cannam@128 122 {
cannam@128 123 long len;
cannam@128 124
cannam@128 125 if (in == NULL)
cannam@128 126 return -1;
cannam@128 127 if (in->left != 0)
cannam@128 128 return 0;
cannam@128 129 in->next = in->buf;
cannam@128 130 do {
cannam@128 131 len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left);
cannam@128 132 if (len < 0)
cannam@128 133 return -1;
cannam@128 134 in->left += (unsigned)len;
cannam@128 135 } while (len != 0 && in->left < CHUNK);
cannam@128 136 return len == 0 ? 1 : 0;
cannam@128 137 }
cannam@128 138
cannam@128 139 /* get a byte from the file, bail if end of file */
cannam@128 140 #define bget(in) (in->left ? 0 : bload(in), \
cannam@128 141 in->left ? (in->left--, *(in->next)++) : \
cannam@128 142 bail("unexpected end of file on ", in->name))
cannam@128 143
cannam@128 144 /* get a four-byte little-endian unsigned integer from file */
cannam@128 145 local unsigned long bget4(bin *in)
cannam@128 146 {
cannam@128 147 unsigned long val;
cannam@128 148
cannam@128 149 val = bget(in);
cannam@128 150 val += (unsigned long)(bget(in)) << 8;
cannam@128 151 val += (unsigned long)(bget(in)) << 16;
cannam@128 152 val += (unsigned long)(bget(in)) << 24;
cannam@128 153 return val;
cannam@128 154 }
cannam@128 155
cannam@128 156 /* skip bytes in file */
cannam@128 157 local void bskip(bin *in, unsigned skip)
cannam@128 158 {
cannam@128 159 /* check pointer */
cannam@128 160 if (in == NULL)
cannam@128 161 return;
cannam@128 162
cannam@128 163 /* easy case -- skip bytes in buffer */
cannam@128 164 if (skip <= in->left) {
cannam@128 165 in->left -= skip;
cannam@128 166 in->next += skip;
cannam@128 167 return;
cannam@128 168 }
cannam@128 169
cannam@128 170 /* skip what's in buffer, discard buffer contents */
cannam@128 171 skip -= in->left;
cannam@128 172 in->left = 0;
cannam@128 173
cannam@128 174 /* seek past multiples of CHUNK bytes */
cannam@128 175 if (skip > CHUNK) {
cannam@128 176 unsigned left;
cannam@128 177
cannam@128 178 left = skip & (CHUNK - 1);
cannam@128 179 if (left == 0) {
cannam@128 180 /* exact number of chunks: seek all the way minus one byte to check
cannam@128 181 for end-of-file with a read */
cannam@128 182 lseek(in->fd, skip - 1, SEEK_CUR);
cannam@128 183 if (read(in->fd, in->buf, 1) != 1)
cannam@128 184 bail("unexpected end of file on ", in->name);
cannam@128 185 return;
cannam@128 186 }
cannam@128 187
cannam@128 188 /* skip the integral chunks, update skip with remainder */
cannam@128 189 lseek(in->fd, skip - left, SEEK_CUR);
cannam@128 190 skip = left;
cannam@128 191 }
cannam@128 192
cannam@128 193 /* read more input and skip remainder */
cannam@128 194 bload(in);
cannam@128 195 if (skip > in->left)
cannam@128 196 bail("unexpected end of file on ", in->name);
cannam@128 197 in->left -= skip;
cannam@128 198 in->next += skip;
cannam@128 199 }
cannam@128 200
cannam@128 201 /* -- end of buffered input functions -- */
cannam@128 202
cannam@128 203 /* skip the gzip header from file in */
cannam@128 204 local void gzhead(bin *in)
cannam@128 205 {
cannam@128 206 int flags;
cannam@128 207
cannam@128 208 /* verify gzip magic header and compression method */
cannam@128 209 if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8)
cannam@128 210 bail(in->name, " is not a valid gzip file");
cannam@128 211
cannam@128 212 /* get and verify flags */
cannam@128 213 flags = bget(in);
cannam@128 214 if ((flags & 0xe0) != 0)
cannam@128 215 bail("unknown reserved bits set in ", in->name);
cannam@128 216
cannam@128 217 /* skip modification time, extra flags, and os */
cannam@128 218 bskip(in, 6);
cannam@128 219
cannam@128 220 /* skip extra field if present */
cannam@128 221 if (flags & 4) {
cannam@128 222 unsigned len;
cannam@128 223
cannam@128 224 len = bget(in);
cannam@128 225 len += (unsigned)(bget(in)) << 8;
cannam@128 226 bskip(in, len);
cannam@128 227 }
cannam@128 228
cannam@128 229 /* skip file name if present */
cannam@128 230 if (flags & 8)
cannam@128 231 while (bget(in) != 0)
cannam@128 232 ;
cannam@128 233
cannam@128 234 /* skip comment if present */
cannam@128 235 if (flags & 16)
cannam@128 236 while (bget(in) != 0)
cannam@128 237 ;
cannam@128 238
cannam@128 239 /* skip header crc if present */
cannam@128 240 if (flags & 2)
cannam@128 241 bskip(in, 2);
cannam@128 242 }
cannam@128 243
cannam@128 244 /* write a four-byte little-endian unsigned integer to out */
cannam@128 245 local void put4(unsigned long val, FILE *out)
cannam@128 246 {
cannam@128 247 putc(val & 0xff, out);
cannam@128 248 putc((val >> 8) & 0xff, out);
cannam@128 249 putc((val >> 16) & 0xff, out);
cannam@128 250 putc((val >> 24) & 0xff, out);
cannam@128 251 }
cannam@128 252
cannam@128 253 /* Load up zlib stream from buffered input, bail if end of file */
cannam@128 254 local void zpull(z_streamp strm, bin *in)
cannam@128 255 {
cannam@128 256 if (in->left == 0)
cannam@128 257 bload(in);
cannam@128 258 if (in->left == 0)
cannam@128 259 bail("unexpected end of file on ", in->name);
cannam@128 260 strm->avail_in = in->left;
cannam@128 261 strm->next_in = in->next;
cannam@128 262 }
cannam@128 263
cannam@128 264 /* Write header for gzip file to out and initialize trailer. */
cannam@128 265 local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out)
cannam@128 266 {
cannam@128 267 fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out);
cannam@128 268 *crc = crc32(0L, Z_NULL, 0);
cannam@128 269 *tot = 0;
cannam@128 270 }
cannam@128 271
cannam@128 272 /* Copy the compressed data from name, zeroing the last block bit of the last
cannam@128 273 block if clr is true, and adding empty blocks as needed to get to a byte
cannam@128 274 boundary. If clr is false, then the last block becomes the last block of
cannam@128 275 the output, and the gzip trailer is written. crc and tot maintains the
cannam@128 276 crc and length (modulo 2^32) of the output for the trailer. The resulting
cannam@128 277 gzip file is written to out. gzinit() must be called before the first call
cannam@128 278 of gzcopy() to write the gzip header and to initialize crc and tot. */
cannam@128 279 local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot,
cannam@128 280 FILE *out)
cannam@128 281 {
cannam@128 282 int ret; /* return value from zlib functions */
cannam@128 283 int pos; /* where the "last block" bit is in byte */
cannam@128 284 int last; /* true if processing the last block */
cannam@128 285 bin *in; /* buffered input file */
cannam@128 286 unsigned char *start; /* start of compressed data in buffer */
cannam@128 287 unsigned char *junk; /* buffer for uncompressed data -- discarded */
cannam@128 288 z_off_t len; /* length of uncompressed data (support > 4 GB) */
cannam@128 289 z_stream strm; /* zlib inflate stream */
cannam@128 290
cannam@128 291 /* open gzip file and skip header */
cannam@128 292 in = bopen(name);
cannam@128 293 if (in == NULL)
cannam@128 294 bail("could not open ", name);
cannam@128 295 gzhead(in);
cannam@128 296
cannam@128 297 /* allocate buffer for uncompressed data and initialize raw inflate
cannam@128 298 stream */
cannam@128 299 junk = malloc(CHUNK);
cannam@128 300 strm.zalloc = Z_NULL;
cannam@128 301 strm.zfree = Z_NULL;
cannam@128 302 strm.opaque = Z_NULL;
cannam@128 303 strm.avail_in = 0;
cannam@128 304 strm.next_in = Z_NULL;
cannam@128 305 ret = inflateInit2(&strm, -15);
cannam@128 306 if (junk == NULL || ret != Z_OK)
cannam@128 307 bail("out of memory", "");
cannam@128 308
cannam@128 309 /* inflate and copy compressed data, clear last-block bit if requested */
cannam@128 310 len = 0;
cannam@128 311 zpull(&strm, in);
cannam@128 312 start = in->next;
cannam@128 313 last = start[0] & 1;
cannam@128 314 if (last && clr)
cannam@128 315 start[0] &= ~1;
cannam@128 316 strm.avail_out = 0;
cannam@128 317 for (;;) {
cannam@128 318 /* if input used and output done, write used input and get more */
cannam@128 319 if (strm.avail_in == 0 && strm.avail_out != 0) {
cannam@128 320 fwrite(start, 1, strm.next_in - start, out);
cannam@128 321 start = in->buf;
cannam@128 322 in->left = 0;
cannam@128 323 zpull(&strm, in);
cannam@128 324 }
cannam@128 325
cannam@128 326 /* decompress -- return early when end-of-block reached */
cannam@128 327 strm.avail_out = CHUNK;
cannam@128 328 strm.next_out = junk;
cannam@128 329 ret = inflate(&strm, Z_BLOCK);
cannam@128 330 switch (ret) {
cannam@128 331 case Z_MEM_ERROR:
cannam@128 332 bail("out of memory", "");
cannam@128 333 case Z_DATA_ERROR:
cannam@128 334 bail("invalid compressed data in ", in->name);
cannam@128 335 }
cannam@128 336
cannam@128 337 /* update length of uncompressed data */
cannam@128 338 len += CHUNK - strm.avail_out;
cannam@128 339
cannam@128 340 /* check for block boundary (only get this when block copied out) */
cannam@128 341 if (strm.data_type & 128) {
cannam@128 342 /* if that was the last block, then done */
cannam@128 343 if (last)
cannam@128 344 break;
cannam@128 345
cannam@128 346 /* number of unused bits in last byte */
cannam@128 347 pos = strm.data_type & 7;
cannam@128 348
cannam@128 349 /* find the next last-block bit */
cannam@128 350 if (pos != 0) {
cannam@128 351 /* next last-block bit is in last used byte */
cannam@128 352 pos = 0x100 >> pos;
cannam@128 353 last = strm.next_in[-1] & pos;
cannam@128 354 if (last && clr)
cannam@128 355 in->buf[strm.next_in - in->buf - 1] &= ~pos;
cannam@128 356 }
cannam@128 357 else {
cannam@128 358 /* next last-block bit is in next unused byte */
cannam@128 359 if (strm.avail_in == 0) {
cannam@128 360 /* don't have that byte yet -- get it */
cannam@128 361 fwrite(start, 1, strm.next_in - start, out);
cannam@128 362 start = in->buf;
cannam@128 363 in->left = 0;
cannam@128 364 zpull(&strm, in);
cannam@128 365 }
cannam@128 366 last = strm.next_in[0] & 1;
cannam@128 367 if (last && clr)
cannam@128 368 in->buf[strm.next_in - in->buf] &= ~1;
cannam@128 369 }
cannam@128 370 }
cannam@128 371 }
cannam@128 372
cannam@128 373 /* update buffer with unused input */
cannam@128 374 in->left = strm.avail_in;
cannam@128 375 in->next = in->buf + (strm.next_in - in->buf);
cannam@128 376
cannam@128 377 /* copy used input, write empty blocks to get to byte boundary */
cannam@128 378 pos = strm.data_type & 7;
cannam@128 379 fwrite(start, 1, in->next - start - 1, out);
cannam@128 380 last = in->next[-1];
cannam@128 381 if (pos == 0 || !clr)
cannam@128 382 /* already at byte boundary, or last file: write last byte */
cannam@128 383 putc(last, out);
cannam@128 384 else {
cannam@128 385 /* append empty blocks to last byte */
cannam@128 386 last &= ((0x100 >> pos) - 1); /* assure unused bits are zero */
cannam@128 387 if (pos & 1) {
cannam@128 388 /* odd -- append an empty stored block */
cannam@128 389 putc(last, out);
cannam@128 390 if (pos == 1)
cannam@128 391 putc(0, out); /* two more bits in block header */
cannam@128 392 fwrite("\0\0\xff\xff", 1, 4, out);
cannam@128 393 }
cannam@128 394 else {
cannam@128 395 /* even -- append 1, 2, or 3 empty fixed blocks */
cannam@128 396 switch (pos) {
cannam@128 397 case 6:
cannam@128 398 putc(last | 8, out);
cannam@128 399 last = 0;
cannam@128 400 case 4:
cannam@128 401 putc(last | 0x20, out);
cannam@128 402 last = 0;
cannam@128 403 case 2:
cannam@128 404 putc(last | 0x80, out);
cannam@128 405 putc(0, out);
cannam@128 406 }
cannam@128 407 }
cannam@128 408 }
cannam@128 409
cannam@128 410 /* update crc and tot */
cannam@128 411 *crc = crc32_combine(*crc, bget4(in), len);
cannam@128 412 *tot += (unsigned long)len;
cannam@128 413
cannam@128 414 /* clean up */
cannam@128 415 inflateEnd(&strm);
cannam@128 416 free(junk);
cannam@128 417 bclose(in);
cannam@128 418
cannam@128 419 /* write trailer if this is the last gzip file */
cannam@128 420 if (!clr) {
cannam@128 421 put4(*crc, out);
cannam@128 422 put4(*tot, out);
cannam@128 423 }
cannam@128 424 }
cannam@128 425
cannam@128 426 /* join the gzip files on the command line, write result to stdout */
cannam@128 427 int main(int argc, char **argv)
cannam@128 428 {
cannam@128 429 unsigned long crc, tot; /* running crc and total uncompressed length */
cannam@128 430
cannam@128 431 /* skip command name */
cannam@128 432 argc--;
cannam@128 433 argv++;
cannam@128 434
cannam@128 435 /* show usage if no arguments */
cannam@128 436 if (argc == 0) {
cannam@128 437 fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n",
cannam@128 438 stderr);
cannam@128 439 return 0;
cannam@128 440 }
cannam@128 441
cannam@128 442 /* join gzip files on command line and write to stdout */
cannam@128 443 gzinit(&crc, &tot, stdout);
cannam@128 444 while (argc--)
cannam@128 445 gzcopy(*argv++, argc, &crc, &tot, stdout);
cannam@128 446
cannam@128 447 /* done */
cannam@128 448 return 0;
cannam@128 449 }