annotate src/zlib-1.2.7/examples/gzjoin.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents e13257ea84a4
children
rev   line source
Chris@4 1 /* gzjoin -- command to join gzip files into one gzip file
Chris@4 2
Chris@4 3 Copyright (C) 2004 Mark Adler, all rights reserved
Chris@4 4 version 1.0, 11 Dec 2004
Chris@4 5
Chris@4 6 This software is provided 'as-is', without any express or implied
Chris@4 7 warranty. In no event will the author be held liable for any damages
Chris@4 8 arising from the use of this software.
Chris@4 9
Chris@4 10 Permission is granted to anyone to use this software for any purpose,
Chris@4 11 including commercial applications, and to alter it and redistribute it
Chris@4 12 freely, subject to the following restrictions:
Chris@4 13
Chris@4 14 1. The origin of this software must not be misrepresented; you must not
Chris@4 15 claim that you wrote the original software. If you use this software
Chris@4 16 in a product, an acknowledgment in the product documentation would be
Chris@4 17 appreciated but is not required.
Chris@4 18 2. Altered source versions must be plainly marked as such, and must not be
Chris@4 19 misrepresented as being the original software.
Chris@4 20 3. This notice may not be removed or altered from any source distribution.
Chris@4 21
Chris@4 22 Mark Adler madler@alumni.caltech.edu
Chris@4 23 */
Chris@4 24
Chris@4 25 /*
Chris@4 26 * Change history:
Chris@4 27 *
Chris@4 28 * 1.0 11 Dec 2004 - First version
Chris@4 29 * 1.1 12 Jun 2005 - Changed ssize_t to long for portability
Chris@4 30 */
Chris@4 31
Chris@4 32 /*
Chris@4 33 gzjoin takes one or more gzip files on the command line and writes out a
Chris@4 34 single gzip file that will uncompress to the concatenation of the
Chris@4 35 uncompressed data from the individual gzip files. gzjoin does this without
Chris@4 36 having to recompress any of the data and without having to calculate a new
Chris@4 37 crc32 for the concatenated uncompressed data. gzjoin does however have to
Chris@4 38 decompress all of the input data in order to find the bits in the compressed
Chris@4 39 data that need to be modified to concatenate the streams.
Chris@4 40
Chris@4 41 gzjoin does not do an integrity check on the input gzip files other than
Chris@4 42 checking the gzip header and decompressing the compressed data. They are
Chris@4 43 otherwise assumed to be complete and correct.
Chris@4 44
Chris@4 45 Each joint between gzip files removes at least 18 bytes of previous trailer
Chris@4 46 and subsequent header, and inserts an average of about three bytes to the
Chris@4 47 compressed data in order to connect the streams. The output gzip file
Chris@4 48 has a minimal ten-byte gzip header with no file name or modification time.
Chris@4 49
Chris@4 50 This program was written to illustrate the use of the Z_BLOCK option of
Chris@4 51 inflate() and the crc32_combine() function. gzjoin will not compile with
Chris@4 52 versions of zlib earlier than 1.2.3.
Chris@4 53 */
Chris@4 54
Chris@4 55 #include <stdio.h> /* fputs(), fprintf(), fwrite(), putc() */
Chris@4 56 #include <stdlib.h> /* exit(), malloc(), free() */
Chris@4 57 #include <fcntl.h> /* open() */
Chris@4 58 #include <unistd.h> /* close(), read(), lseek() */
Chris@4 59 #include "zlib.h"
Chris@4 60 /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */
Chris@4 61
Chris@4 62 #define local static
Chris@4 63
Chris@4 64 /* exit with an error (return a value to allow use in an expression) */
Chris@4 65 local int bail(char *why1, char *why2)
Chris@4 66 {
Chris@4 67 fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2);
Chris@4 68 exit(1);
Chris@4 69 return 0;
Chris@4 70 }
Chris@4 71
Chris@4 72 /* -- simple buffered file input with access to the buffer -- */
Chris@4 73
Chris@4 74 #define CHUNK 32768 /* must be a power of two and fit in unsigned */
Chris@4 75
Chris@4 76 /* bin buffered input file type */
Chris@4 77 typedef struct {
Chris@4 78 char *name; /* name of file for error messages */
Chris@4 79 int fd; /* file descriptor */
Chris@4 80 unsigned left; /* bytes remaining at next */
Chris@4 81 unsigned char *next; /* next byte to read */
Chris@4 82 unsigned char *buf; /* allocated buffer of length CHUNK */
Chris@4 83 } bin;
Chris@4 84
Chris@4 85 /* close a buffered file and free allocated memory */
Chris@4 86 local void bclose(bin *in)
Chris@4 87 {
Chris@4 88 if (in != NULL) {
Chris@4 89 if (in->fd != -1)
Chris@4 90 close(in->fd);
Chris@4 91 if (in->buf != NULL)
Chris@4 92 free(in->buf);
Chris@4 93 free(in);
Chris@4 94 }
Chris@4 95 }
Chris@4 96
Chris@4 97 /* open a buffered file for input, return a pointer to type bin, or NULL on
Chris@4 98 failure */
Chris@4 99 local bin *bopen(char *name)
Chris@4 100 {
Chris@4 101 bin *in;
Chris@4 102
Chris@4 103 in = malloc(sizeof(bin));
Chris@4 104 if (in == NULL)
Chris@4 105 return NULL;
Chris@4 106 in->buf = malloc(CHUNK);
Chris@4 107 in->fd = open(name, O_RDONLY, 0);
Chris@4 108 if (in->buf == NULL || in->fd == -1) {
Chris@4 109 bclose(in);
Chris@4 110 return NULL;
Chris@4 111 }
Chris@4 112 in->left = 0;
Chris@4 113 in->next = in->buf;
Chris@4 114 in->name = name;
Chris@4 115 return in;
Chris@4 116 }
Chris@4 117
Chris@4 118 /* load buffer from file, return -1 on read error, 0 or 1 on success, with
Chris@4 119 1 indicating that end-of-file was reached */
Chris@4 120 local int bload(bin *in)
Chris@4 121 {
Chris@4 122 long len;
Chris@4 123
Chris@4 124 if (in == NULL)
Chris@4 125 return -1;
Chris@4 126 if (in->left != 0)
Chris@4 127 return 0;
Chris@4 128 in->next = in->buf;
Chris@4 129 do {
Chris@4 130 len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left);
Chris@4 131 if (len < 0)
Chris@4 132 return -1;
Chris@4 133 in->left += (unsigned)len;
Chris@4 134 } while (len != 0 && in->left < CHUNK);
Chris@4 135 return len == 0 ? 1 : 0;
Chris@4 136 }
Chris@4 137
Chris@4 138 /* get a byte from the file, bail if end of file */
Chris@4 139 #define bget(in) (in->left ? 0 : bload(in), \
Chris@4 140 in->left ? (in->left--, *(in->next)++) : \
Chris@4 141 bail("unexpected end of file on ", in->name))
Chris@4 142
Chris@4 143 /* get a four-byte little-endian unsigned integer from file */
Chris@4 144 local unsigned long bget4(bin *in)
Chris@4 145 {
Chris@4 146 unsigned long val;
Chris@4 147
Chris@4 148 val = bget(in);
Chris@4 149 val += (unsigned long)(bget(in)) << 8;
Chris@4 150 val += (unsigned long)(bget(in)) << 16;
Chris@4 151 val += (unsigned long)(bget(in)) << 24;
Chris@4 152 return val;
Chris@4 153 }
Chris@4 154
Chris@4 155 /* skip bytes in file */
Chris@4 156 local void bskip(bin *in, unsigned skip)
Chris@4 157 {
Chris@4 158 /* check pointer */
Chris@4 159 if (in == NULL)
Chris@4 160 return;
Chris@4 161
Chris@4 162 /* easy case -- skip bytes in buffer */
Chris@4 163 if (skip <= in->left) {
Chris@4 164 in->left -= skip;
Chris@4 165 in->next += skip;
Chris@4 166 return;
Chris@4 167 }
Chris@4 168
Chris@4 169 /* skip what's in buffer, discard buffer contents */
Chris@4 170 skip -= in->left;
Chris@4 171 in->left = 0;
Chris@4 172
Chris@4 173 /* seek past multiples of CHUNK bytes */
Chris@4 174 if (skip > CHUNK) {
Chris@4 175 unsigned left;
Chris@4 176
Chris@4 177 left = skip & (CHUNK - 1);
Chris@4 178 if (left == 0) {
Chris@4 179 /* exact number of chunks: seek all the way minus one byte to check
Chris@4 180 for end-of-file with a read */
Chris@4 181 lseek(in->fd, skip - 1, SEEK_CUR);
Chris@4 182 if (read(in->fd, in->buf, 1) != 1)
Chris@4 183 bail("unexpected end of file on ", in->name);
Chris@4 184 return;
Chris@4 185 }
Chris@4 186
Chris@4 187 /* skip the integral chunks, update skip with remainder */
Chris@4 188 lseek(in->fd, skip - left, SEEK_CUR);
Chris@4 189 skip = left;
Chris@4 190 }
Chris@4 191
Chris@4 192 /* read more input and skip remainder */
Chris@4 193 bload(in);
Chris@4 194 if (skip > in->left)
Chris@4 195 bail("unexpected end of file on ", in->name);
Chris@4 196 in->left -= skip;
Chris@4 197 in->next += skip;
Chris@4 198 }
Chris@4 199
Chris@4 200 /* -- end of buffered input functions -- */
Chris@4 201
Chris@4 202 /* skip the gzip header from file in */
Chris@4 203 local void gzhead(bin *in)
Chris@4 204 {
Chris@4 205 int flags;
Chris@4 206
Chris@4 207 /* verify gzip magic header and compression method */
Chris@4 208 if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8)
Chris@4 209 bail(in->name, " is not a valid gzip file");
Chris@4 210
Chris@4 211 /* get and verify flags */
Chris@4 212 flags = bget(in);
Chris@4 213 if ((flags & 0xe0) != 0)
Chris@4 214 bail("unknown reserved bits set in ", in->name);
Chris@4 215
Chris@4 216 /* skip modification time, extra flags, and os */
Chris@4 217 bskip(in, 6);
Chris@4 218
Chris@4 219 /* skip extra field if present */
Chris@4 220 if (flags & 4) {
Chris@4 221 unsigned len;
Chris@4 222
Chris@4 223 len = bget(in);
Chris@4 224 len += (unsigned)(bget(in)) << 8;
Chris@4 225 bskip(in, len);
Chris@4 226 }
Chris@4 227
Chris@4 228 /* skip file name if present */
Chris@4 229 if (flags & 8)
Chris@4 230 while (bget(in) != 0)
Chris@4 231 ;
Chris@4 232
Chris@4 233 /* skip comment if present */
Chris@4 234 if (flags & 16)
Chris@4 235 while (bget(in) != 0)
Chris@4 236 ;
Chris@4 237
Chris@4 238 /* skip header crc if present */
Chris@4 239 if (flags & 2)
Chris@4 240 bskip(in, 2);
Chris@4 241 }
Chris@4 242
Chris@4 243 /* write a four-byte little-endian unsigned integer to out */
Chris@4 244 local void put4(unsigned long val, FILE *out)
Chris@4 245 {
Chris@4 246 putc(val & 0xff, out);
Chris@4 247 putc((val >> 8) & 0xff, out);
Chris@4 248 putc((val >> 16) & 0xff, out);
Chris@4 249 putc((val >> 24) & 0xff, out);
Chris@4 250 }
Chris@4 251
Chris@4 252 /* Load up zlib stream from buffered input, bail if end of file */
Chris@4 253 local void zpull(z_streamp strm, bin *in)
Chris@4 254 {
Chris@4 255 if (in->left == 0)
Chris@4 256 bload(in);
Chris@4 257 if (in->left == 0)
Chris@4 258 bail("unexpected end of file on ", in->name);
Chris@4 259 strm->avail_in = in->left;
Chris@4 260 strm->next_in = in->next;
Chris@4 261 }
Chris@4 262
Chris@4 263 /* Write header for gzip file to out and initialize trailer. */
Chris@4 264 local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out)
Chris@4 265 {
Chris@4 266 fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out);
Chris@4 267 *crc = crc32(0L, Z_NULL, 0);
Chris@4 268 *tot = 0;
Chris@4 269 }
Chris@4 270
Chris@4 271 /* Copy the compressed data from name, zeroing the last block bit of the last
Chris@4 272 block if clr is true, and adding empty blocks as needed to get to a byte
Chris@4 273 boundary. If clr is false, then the last block becomes the last block of
Chris@4 274 the output, and the gzip trailer is written. crc and tot maintains the
Chris@4 275 crc and length (modulo 2^32) of the output for the trailer. The resulting
Chris@4 276 gzip file is written to out. gzinit() must be called before the first call
Chris@4 277 of gzcopy() to write the gzip header and to initialize crc and tot. */
Chris@4 278 local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot,
Chris@4 279 FILE *out)
Chris@4 280 {
Chris@4 281 int ret; /* return value from zlib functions */
Chris@4 282 int pos; /* where the "last block" bit is in byte */
Chris@4 283 int last; /* true if processing the last block */
Chris@4 284 bin *in; /* buffered input file */
Chris@4 285 unsigned char *start; /* start of compressed data in buffer */
Chris@4 286 unsigned char *junk; /* buffer for uncompressed data -- discarded */
Chris@4 287 z_off_t len; /* length of uncompressed data (support > 4 GB) */
Chris@4 288 z_stream strm; /* zlib inflate stream */
Chris@4 289
Chris@4 290 /* open gzip file and skip header */
Chris@4 291 in = bopen(name);
Chris@4 292 if (in == NULL)
Chris@4 293 bail("could not open ", name);
Chris@4 294 gzhead(in);
Chris@4 295
Chris@4 296 /* allocate buffer for uncompressed data and initialize raw inflate
Chris@4 297 stream */
Chris@4 298 junk = malloc(CHUNK);
Chris@4 299 strm.zalloc = Z_NULL;
Chris@4 300 strm.zfree = Z_NULL;
Chris@4 301 strm.opaque = Z_NULL;
Chris@4 302 strm.avail_in = 0;
Chris@4 303 strm.next_in = Z_NULL;
Chris@4 304 ret = inflateInit2(&strm, -15);
Chris@4 305 if (junk == NULL || ret != Z_OK)
Chris@4 306 bail("out of memory", "");
Chris@4 307
Chris@4 308 /* inflate and copy compressed data, clear last-block bit if requested */
Chris@4 309 len = 0;
Chris@4 310 zpull(&strm, in);
Chris@4 311 start = strm.next_in;
Chris@4 312 last = start[0] & 1;
Chris@4 313 if (last && clr)
Chris@4 314 start[0] &= ~1;
Chris@4 315 strm.avail_out = 0;
Chris@4 316 for (;;) {
Chris@4 317 /* if input used and output done, write used input and get more */
Chris@4 318 if (strm.avail_in == 0 && strm.avail_out != 0) {
Chris@4 319 fwrite(start, 1, strm.next_in - start, out);
Chris@4 320 start = in->buf;
Chris@4 321 in->left = 0;
Chris@4 322 zpull(&strm, in);
Chris@4 323 }
Chris@4 324
Chris@4 325 /* decompress -- return early when end-of-block reached */
Chris@4 326 strm.avail_out = CHUNK;
Chris@4 327 strm.next_out = junk;
Chris@4 328 ret = inflate(&strm, Z_BLOCK);
Chris@4 329 switch (ret) {
Chris@4 330 case Z_MEM_ERROR:
Chris@4 331 bail("out of memory", "");
Chris@4 332 case Z_DATA_ERROR:
Chris@4 333 bail("invalid compressed data in ", in->name);
Chris@4 334 }
Chris@4 335
Chris@4 336 /* update length of uncompressed data */
Chris@4 337 len += CHUNK - strm.avail_out;
Chris@4 338
Chris@4 339 /* check for block boundary (only get this when block copied out) */
Chris@4 340 if (strm.data_type & 128) {
Chris@4 341 /* if that was the last block, then done */
Chris@4 342 if (last)
Chris@4 343 break;
Chris@4 344
Chris@4 345 /* number of unused bits in last byte */
Chris@4 346 pos = strm.data_type & 7;
Chris@4 347
Chris@4 348 /* find the next last-block bit */
Chris@4 349 if (pos != 0) {
Chris@4 350 /* next last-block bit is in last used byte */
Chris@4 351 pos = 0x100 >> pos;
Chris@4 352 last = strm.next_in[-1] & pos;
Chris@4 353 if (last && clr)
Chris@4 354 strm.next_in[-1] &= ~pos;
Chris@4 355 }
Chris@4 356 else {
Chris@4 357 /* next last-block bit is in next unused byte */
Chris@4 358 if (strm.avail_in == 0) {
Chris@4 359 /* don't have that byte yet -- get it */
Chris@4 360 fwrite(start, 1, strm.next_in - start, out);
Chris@4 361 start = in->buf;
Chris@4 362 in->left = 0;
Chris@4 363 zpull(&strm, in);
Chris@4 364 }
Chris@4 365 last = strm.next_in[0] & 1;
Chris@4 366 if (last && clr)
Chris@4 367 strm.next_in[0] &= ~1;
Chris@4 368 }
Chris@4 369 }
Chris@4 370 }
Chris@4 371
Chris@4 372 /* update buffer with unused input */
Chris@4 373 in->left = strm.avail_in;
Chris@4 374 in->next = strm.next_in;
Chris@4 375
Chris@4 376 /* copy used input, write empty blocks to get to byte boundary */
Chris@4 377 pos = strm.data_type & 7;
Chris@4 378 fwrite(start, 1, in->next - start - 1, out);
Chris@4 379 last = in->next[-1];
Chris@4 380 if (pos == 0 || !clr)
Chris@4 381 /* already at byte boundary, or last file: write last byte */
Chris@4 382 putc(last, out);
Chris@4 383 else {
Chris@4 384 /* append empty blocks to last byte */
Chris@4 385 last &= ((0x100 >> pos) - 1); /* assure unused bits are zero */
Chris@4 386 if (pos & 1) {
Chris@4 387 /* odd -- append an empty stored block */
Chris@4 388 putc(last, out);
Chris@4 389 if (pos == 1)
Chris@4 390 putc(0, out); /* two more bits in block header */
Chris@4 391 fwrite("\0\0\xff\xff", 1, 4, out);
Chris@4 392 }
Chris@4 393 else {
Chris@4 394 /* even -- append 1, 2, or 3 empty fixed blocks */
Chris@4 395 switch (pos) {
Chris@4 396 case 6:
Chris@4 397 putc(last | 8, out);
Chris@4 398 last = 0;
Chris@4 399 case 4:
Chris@4 400 putc(last | 0x20, out);
Chris@4 401 last = 0;
Chris@4 402 case 2:
Chris@4 403 putc(last | 0x80, out);
Chris@4 404 putc(0, out);
Chris@4 405 }
Chris@4 406 }
Chris@4 407 }
Chris@4 408
Chris@4 409 /* update crc and tot */
Chris@4 410 *crc = crc32_combine(*crc, bget4(in), len);
Chris@4 411 *tot += (unsigned long)len;
Chris@4 412
Chris@4 413 /* clean up */
Chris@4 414 inflateEnd(&strm);
Chris@4 415 free(junk);
Chris@4 416 bclose(in);
Chris@4 417
Chris@4 418 /* write trailer if this is the last gzip file */
Chris@4 419 if (!clr) {
Chris@4 420 put4(*crc, out);
Chris@4 421 put4(*tot, out);
Chris@4 422 }
Chris@4 423 }
Chris@4 424
Chris@4 425 /* join the gzip files on the command line, write result to stdout */
Chris@4 426 int main(int argc, char **argv)
Chris@4 427 {
Chris@4 428 unsigned long crc, tot; /* running crc and total uncompressed length */
Chris@4 429
Chris@4 430 /* skip command name */
Chris@4 431 argc--;
Chris@4 432 argv++;
Chris@4 433
Chris@4 434 /* show usage if no arguments */
Chris@4 435 if (argc == 0) {
Chris@4 436 fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n",
Chris@4 437 stderr);
Chris@4 438 return 0;
Chris@4 439 }
Chris@4 440
Chris@4 441 /* join gzip files on command line and write to stdout */
Chris@4 442 gzinit(&crc, &tot, stdout);
Chris@4 443 while (argc--)
Chris@4 444 gzcopy(*argv++, argc, &crc, &tot, stdout);
Chris@4 445
Chris@4 446 /* done */
Chris@4 447 return 0;
Chris@4 448 }