annotate src/zlib-1.2.7/examples/gzjoin.c @ 168:ceec0dd9ec9c

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam <cannam@all-day-breakfast.com>
date Fri, 07 Feb 2020 11:51:13 +0000
parents 8a15ff55d9af
children
rev   line source
cannam@89 1 /* gzjoin -- command to join gzip files into one gzip file
cannam@89 2
cannam@89 3 Copyright (C) 2004 Mark Adler, all rights reserved
cannam@89 4 version 1.0, 11 Dec 2004
cannam@89 5
cannam@89 6 This software is provided 'as-is', without any express or implied
cannam@89 7 warranty. In no event will the author be held liable for any damages
cannam@89 8 arising from the use of this software.
cannam@89 9
cannam@89 10 Permission is granted to anyone to use this software for any purpose,
cannam@89 11 including commercial applications, and to alter it and redistribute it
cannam@89 12 freely, subject to the following restrictions:
cannam@89 13
cannam@89 14 1. The origin of this software must not be misrepresented; you must not
cannam@89 15 claim that you wrote the original software. If you use this software
cannam@89 16 in a product, an acknowledgment in the product documentation would be
cannam@89 17 appreciated but is not required.
cannam@89 18 2. Altered source versions must be plainly marked as such, and must not be
cannam@89 19 misrepresented as being the original software.
cannam@89 20 3. This notice may not be removed or altered from any source distribution.
cannam@89 21
cannam@89 22 Mark Adler madler@alumni.caltech.edu
cannam@89 23 */
cannam@89 24
cannam@89 25 /*
cannam@89 26 * Change history:
cannam@89 27 *
cannam@89 28 * 1.0 11 Dec 2004 - First version
cannam@89 29 * 1.1 12 Jun 2005 - Changed ssize_t to long for portability
cannam@89 30 */
cannam@89 31
cannam@89 32 /*
cannam@89 33 gzjoin takes one or more gzip files on the command line and writes out a
cannam@89 34 single gzip file that will uncompress to the concatenation of the
cannam@89 35 uncompressed data from the individual gzip files. gzjoin does this without
cannam@89 36 having to recompress any of the data and without having to calculate a new
cannam@89 37 crc32 for the concatenated uncompressed data. gzjoin does however have to
cannam@89 38 decompress all of the input data in order to find the bits in the compressed
cannam@89 39 data that need to be modified to concatenate the streams.
cannam@89 40
cannam@89 41 gzjoin does not do an integrity check on the input gzip files other than
cannam@89 42 checking the gzip header and decompressing the compressed data. They are
cannam@89 43 otherwise assumed to be complete and correct.
cannam@89 44
cannam@89 45 Each joint between gzip files removes at least 18 bytes of previous trailer
cannam@89 46 and subsequent header, and inserts an average of about three bytes to the
cannam@89 47 compressed data in order to connect the streams. The output gzip file
cannam@89 48 has a minimal ten-byte gzip header with no file name or modification time.
cannam@89 49
cannam@89 50 This program was written to illustrate the use of the Z_BLOCK option of
cannam@89 51 inflate() and the crc32_combine() function. gzjoin will not compile with
cannam@89 52 versions of zlib earlier than 1.2.3.
cannam@89 53 */
cannam@89 54
cannam@89 55 #include <stdio.h> /* fputs(), fprintf(), fwrite(), putc() */
cannam@89 56 #include <stdlib.h> /* exit(), malloc(), free() */
cannam@89 57 #include <fcntl.h> /* open() */
cannam@89 58 #include <unistd.h> /* close(), read(), lseek() */
cannam@89 59 #include "zlib.h"
cannam@89 60 /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */
cannam@89 61
cannam@89 62 #define local static
cannam@89 63
cannam@89 64 /* exit with an error (return a value to allow use in an expression) */
cannam@89 65 local int bail(char *why1, char *why2)
cannam@89 66 {
cannam@89 67 fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2);
cannam@89 68 exit(1);
cannam@89 69 return 0;
cannam@89 70 }
cannam@89 71
cannam@89 72 /* -- simple buffered file input with access to the buffer -- */
cannam@89 73
cannam@89 74 #define CHUNK 32768 /* must be a power of two and fit in unsigned */
cannam@89 75
cannam@89 76 /* bin buffered input file type */
cannam@89 77 typedef struct {
cannam@89 78 char *name; /* name of file for error messages */
cannam@89 79 int fd; /* file descriptor */
cannam@89 80 unsigned left; /* bytes remaining at next */
cannam@89 81 unsigned char *next; /* next byte to read */
cannam@89 82 unsigned char *buf; /* allocated buffer of length CHUNK */
cannam@89 83 } bin;
cannam@89 84
cannam@89 85 /* close a buffered file and free allocated memory */
cannam@89 86 local void bclose(bin *in)
cannam@89 87 {
cannam@89 88 if (in != NULL) {
cannam@89 89 if (in->fd != -1)
cannam@89 90 close(in->fd);
cannam@89 91 if (in->buf != NULL)
cannam@89 92 free(in->buf);
cannam@89 93 free(in);
cannam@89 94 }
cannam@89 95 }
cannam@89 96
cannam@89 97 /* open a buffered file for input, return a pointer to type bin, or NULL on
cannam@89 98 failure */
cannam@89 99 local bin *bopen(char *name)
cannam@89 100 {
cannam@89 101 bin *in;
cannam@89 102
cannam@89 103 in = malloc(sizeof(bin));
cannam@89 104 if (in == NULL)
cannam@89 105 return NULL;
cannam@89 106 in->buf = malloc(CHUNK);
cannam@89 107 in->fd = open(name, O_RDONLY, 0);
cannam@89 108 if (in->buf == NULL || in->fd == -1) {
cannam@89 109 bclose(in);
cannam@89 110 return NULL;
cannam@89 111 }
cannam@89 112 in->left = 0;
cannam@89 113 in->next = in->buf;
cannam@89 114 in->name = name;
cannam@89 115 return in;
cannam@89 116 }
cannam@89 117
cannam@89 118 /* load buffer from file, return -1 on read error, 0 or 1 on success, with
cannam@89 119 1 indicating that end-of-file was reached */
cannam@89 120 local int bload(bin *in)
cannam@89 121 {
cannam@89 122 long len;
cannam@89 123
cannam@89 124 if (in == NULL)
cannam@89 125 return -1;
cannam@89 126 if (in->left != 0)
cannam@89 127 return 0;
cannam@89 128 in->next = in->buf;
cannam@89 129 do {
cannam@89 130 len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left);
cannam@89 131 if (len < 0)
cannam@89 132 return -1;
cannam@89 133 in->left += (unsigned)len;
cannam@89 134 } while (len != 0 && in->left < CHUNK);
cannam@89 135 return len == 0 ? 1 : 0;
cannam@89 136 }
cannam@89 137
cannam@89 138 /* get a byte from the file, bail if end of file */
cannam@89 139 #define bget(in) (in->left ? 0 : bload(in), \
cannam@89 140 in->left ? (in->left--, *(in->next)++) : \
cannam@89 141 bail("unexpected end of file on ", in->name))
cannam@89 142
cannam@89 143 /* get a four-byte little-endian unsigned integer from file */
cannam@89 144 local unsigned long bget4(bin *in)
cannam@89 145 {
cannam@89 146 unsigned long val;
cannam@89 147
cannam@89 148 val = bget(in);
cannam@89 149 val += (unsigned long)(bget(in)) << 8;
cannam@89 150 val += (unsigned long)(bget(in)) << 16;
cannam@89 151 val += (unsigned long)(bget(in)) << 24;
cannam@89 152 return val;
cannam@89 153 }
cannam@89 154
cannam@89 155 /* skip bytes in file */
cannam@89 156 local void bskip(bin *in, unsigned skip)
cannam@89 157 {
cannam@89 158 /* check pointer */
cannam@89 159 if (in == NULL)
cannam@89 160 return;
cannam@89 161
cannam@89 162 /* easy case -- skip bytes in buffer */
cannam@89 163 if (skip <= in->left) {
cannam@89 164 in->left -= skip;
cannam@89 165 in->next += skip;
cannam@89 166 return;
cannam@89 167 }
cannam@89 168
cannam@89 169 /* skip what's in buffer, discard buffer contents */
cannam@89 170 skip -= in->left;
cannam@89 171 in->left = 0;
cannam@89 172
cannam@89 173 /* seek past multiples of CHUNK bytes */
cannam@89 174 if (skip > CHUNK) {
cannam@89 175 unsigned left;
cannam@89 176
cannam@89 177 left = skip & (CHUNK - 1);
cannam@89 178 if (left == 0) {
cannam@89 179 /* exact number of chunks: seek all the way minus one byte to check
cannam@89 180 for end-of-file with a read */
cannam@89 181 lseek(in->fd, skip - 1, SEEK_CUR);
cannam@89 182 if (read(in->fd, in->buf, 1) != 1)
cannam@89 183 bail("unexpected end of file on ", in->name);
cannam@89 184 return;
cannam@89 185 }
cannam@89 186
cannam@89 187 /* skip the integral chunks, update skip with remainder */
cannam@89 188 lseek(in->fd, skip - left, SEEK_CUR);
cannam@89 189 skip = left;
cannam@89 190 }
cannam@89 191
cannam@89 192 /* read more input and skip remainder */
cannam@89 193 bload(in);
cannam@89 194 if (skip > in->left)
cannam@89 195 bail("unexpected end of file on ", in->name);
cannam@89 196 in->left -= skip;
cannam@89 197 in->next += skip;
cannam@89 198 }
cannam@89 199
cannam@89 200 /* -- end of buffered input functions -- */
cannam@89 201
cannam@89 202 /* skip the gzip header from file in */
cannam@89 203 local void gzhead(bin *in)
cannam@89 204 {
cannam@89 205 int flags;
cannam@89 206
cannam@89 207 /* verify gzip magic header and compression method */
cannam@89 208 if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8)
cannam@89 209 bail(in->name, " is not a valid gzip file");
cannam@89 210
cannam@89 211 /* get and verify flags */
cannam@89 212 flags = bget(in);
cannam@89 213 if ((flags & 0xe0) != 0)
cannam@89 214 bail("unknown reserved bits set in ", in->name);
cannam@89 215
cannam@89 216 /* skip modification time, extra flags, and os */
cannam@89 217 bskip(in, 6);
cannam@89 218
cannam@89 219 /* skip extra field if present */
cannam@89 220 if (flags & 4) {
cannam@89 221 unsigned len;
cannam@89 222
cannam@89 223 len = bget(in);
cannam@89 224 len += (unsigned)(bget(in)) << 8;
cannam@89 225 bskip(in, len);
cannam@89 226 }
cannam@89 227
cannam@89 228 /* skip file name if present */
cannam@89 229 if (flags & 8)
cannam@89 230 while (bget(in) != 0)
cannam@89 231 ;
cannam@89 232
cannam@89 233 /* skip comment if present */
cannam@89 234 if (flags & 16)
cannam@89 235 while (bget(in) != 0)
cannam@89 236 ;
cannam@89 237
cannam@89 238 /* skip header crc if present */
cannam@89 239 if (flags & 2)
cannam@89 240 bskip(in, 2);
cannam@89 241 }
cannam@89 242
cannam@89 243 /* write a four-byte little-endian unsigned integer to out */
cannam@89 244 local void put4(unsigned long val, FILE *out)
cannam@89 245 {
cannam@89 246 putc(val & 0xff, out);
cannam@89 247 putc((val >> 8) & 0xff, out);
cannam@89 248 putc((val >> 16) & 0xff, out);
cannam@89 249 putc((val >> 24) & 0xff, out);
cannam@89 250 }
cannam@89 251
cannam@89 252 /* Load up zlib stream from buffered input, bail if end of file */
cannam@89 253 local void zpull(z_streamp strm, bin *in)
cannam@89 254 {
cannam@89 255 if (in->left == 0)
cannam@89 256 bload(in);
cannam@89 257 if (in->left == 0)
cannam@89 258 bail("unexpected end of file on ", in->name);
cannam@89 259 strm->avail_in = in->left;
cannam@89 260 strm->next_in = in->next;
cannam@89 261 }
cannam@89 262
cannam@89 263 /* Write header for gzip file to out and initialize trailer. */
cannam@89 264 local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out)
cannam@89 265 {
cannam@89 266 fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out);
cannam@89 267 *crc = crc32(0L, Z_NULL, 0);
cannam@89 268 *tot = 0;
cannam@89 269 }
cannam@89 270
cannam@89 271 /* Copy the compressed data from name, zeroing the last block bit of the last
cannam@89 272 block if clr is true, and adding empty blocks as needed to get to a byte
cannam@89 273 boundary. If clr is false, then the last block becomes the last block of
cannam@89 274 the output, and the gzip trailer is written. crc and tot maintains the
cannam@89 275 crc and length (modulo 2^32) of the output for the trailer. The resulting
cannam@89 276 gzip file is written to out. gzinit() must be called before the first call
cannam@89 277 of gzcopy() to write the gzip header and to initialize crc and tot. */
cannam@89 278 local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot,
cannam@89 279 FILE *out)
cannam@89 280 {
cannam@89 281 int ret; /* return value from zlib functions */
cannam@89 282 int pos; /* where the "last block" bit is in byte */
cannam@89 283 int last; /* true if processing the last block */
cannam@89 284 bin *in; /* buffered input file */
cannam@89 285 unsigned char *start; /* start of compressed data in buffer */
cannam@89 286 unsigned char *junk; /* buffer for uncompressed data -- discarded */
cannam@89 287 z_off_t len; /* length of uncompressed data (support > 4 GB) */
cannam@89 288 z_stream strm; /* zlib inflate stream */
cannam@89 289
cannam@89 290 /* open gzip file and skip header */
cannam@89 291 in = bopen(name);
cannam@89 292 if (in == NULL)
cannam@89 293 bail("could not open ", name);
cannam@89 294 gzhead(in);
cannam@89 295
cannam@89 296 /* allocate buffer for uncompressed data and initialize raw inflate
cannam@89 297 stream */
cannam@89 298 junk = malloc(CHUNK);
cannam@89 299 strm.zalloc = Z_NULL;
cannam@89 300 strm.zfree = Z_NULL;
cannam@89 301 strm.opaque = Z_NULL;
cannam@89 302 strm.avail_in = 0;
cannam@89 303 strm.next_in = Z_NULL;
cannam@89 304 ret = inflateInit2(&strm, -15);
cannam@89 305 if (junk == NULL || ret != Z_OK)
cannam@89 306 bail("out of memory", "");
cannam@89 307
cannam@89 308 /* inflate and copy compressed data, clear last-block bit if requested */
cannam@89 309 len = 0;
cannam@89 310 zpull(&strm, in);
cannam@89 311 start = strm.next_in;
cannam@89 312 last = start[0] & 1;
cannam@89 313 if (last && clr)
cannam@89 314 start[0] &= ~1;
cannam@89 315 strm.avail_out = 0;
cannam@89 316 for (;;) {
cannam@89 317 /* if input used and output done, write used input and get more */
cannam@89 318 if (strm.avail_in == 0 && strm.avail_out != 0) {
cannam@89 319 fwrite(start, 1, strm.next_in - start, out);
cannam@89 320 start = in->buf;
cannam@89 321 in->left = 0;
cannam@89 322 zpull(&strm, in);
cannam@89 323 }
cannam@89 324
cannam@89 325 /* decompress -- return early when end-of-block reached */
cannam@89 326 strm.avail_out = CHUNK;
cannam@89 327 strm.next_out = junk;
cannam@89 328 ret = inflate(&strm, Z_BLOCK);
cannam@89 329 switch (ret) {
cannam@89 330 case Z_MEM_ERROR:
cannam@89 331 bail("out of memory", "");
cannam@89 332 case Z_DATA_ERROR:
cannam@89 333 bail("invalid compressed data in ", in->name);
cannam@89 334 }
cannam@89 335
cannam@89 336 /* update length of uncompressed data */
cannam@89 337 len += CHUNK - strm.avail_out;
cannam@89 338
cannam@89 339 /* check for block boundary (only get this when block copied out) */
cannam@89 340 if (strm.data_type & 128) {
cannam@89 341 /* if that was the last block, then done */
cannam@89 342 if (last)
cannam@89 343 break;
cannam@89 344
cannam@89 345 /* number of unused bits in last byte */
cannam@89 346 pos = strm.data_type & 7;
cannam@89 347
cannam@89 348 /* find the next last-block bit */
cannam@89 349 if (pos != 0) {
cannam@89 350 /* next last-block bit is in last used byte */
cannam@89 351 pos = 0x100 >> pos;
cannam@89 352 last = strm.next_in[-1] & pos;
cannam@89 353 if (last && clr)
cannam@89 354 strm.next_in[-1] &= ~pos;
cannam@89 355 }
cannam@89 356 else {
cannam@89 357 /* next last-block bit is in next unused byte */
cannam@89 358 if (strm.avail_in == 0) {
cannam@89 359 /* don't have that byte yet -- get it */
cannam@89 360 fwrite(start, 1, strm.next_in - start, out);
cannam@89 361 start = in->buf;
cannam@89 362 in->left = 0;
cannam@89 363 zpull(&strm, in);
cannam@89 364 }
cannam@89 365 last = strm.next_in[0] & 1;
cannam@89 366 if (last && clr)
cannam@89 367 strm.next_in[0] &= ~1;
cannam@89 368 }
cannam@89 369 }
cannam@89 370 }
cannam@89 371
cannam@89 372 /* update buffer with unused input */
cannam@89 373 in->left = strm.avail_in;
cannam@89 374 in->next = strm.next_in;
cannam@89 375
cannam@89 376 /* copy used input, write empty blocks to get to byte boundary */
cannam@89 377 pos = strm.data_type & 7;
cannam@89 378 fwrite(start, 1, in->next - start - 1, out);
cannam@89 379 last = in->next[-1];
cannam@89 380 if (pos == 0 || !clr)
cannam@89 381 /* already at byte boundary, or last file: write last byte */
cannam@89 382 putc(last, out);
cannam@89 383 else {
cannam@89 384 /* append empty blocks to last byte */
cannam@89 385 last &= ((0x100 >> pos) - 1); /* assure unused bits are zero */
cannam@89 386 if (pos & 1) {
cannam@89 387 /* odd -- append an empty stored block */
cannam@89 388 putc(last, out);
cannam@89 389 if (pos == 1)
cannam@89 390 putc(0, out); /* two more bits in block header */
cannam@89 391 fwrite("\0\0\xff\xff", 1, 4, out);
cannam@89 392 }
cannam@89 393 else {
cannam@89 394 /* even -- append 1, 2, or 3 empty fixed blocks */
cannam@89 395 switch (pos) {
cannam@89 396 case 6:
cannam@89 397 putc(last | 8, out);
cannam@89 398 last = 0;
cannam@89 399 case 4:
cannam@89 400 putc(last | 0x20, out);
cannam@89 401 last = 0;
cannam@89 402 case 2:
cannam@89 403 putc(last | 0x80, out);
cannam@89 404 putc(0, out);
cannam@89 405 }
cannam@89 406 }
cannam@89 407 }
cannam@89 408
cannam@89 409 /* update crc and tot */
cannam@89 410 *crc = crc32_combine(*crc, bget4(in), len);
cannam@89 411 *tot += (unsigned long)len;
cannam@89 412
cannam@89 413 /* clean up */
cannam@89 414 inflateEnd(&strm);
cannam@89 415 free(junk);
cannam@89 416 bclose(in);
cannam@89 417
cannam@89 418 /* write trailer if this is the last gzip file */
cannam@89 419 if (!clr) {
cannam@89 420 put4(*crc, out);
cannam@89 421 put4(*tot, out);
cannam@89 422 }
cannam@89 423 }
cannam@89 424
cannam@89 425 /* join the gzip files on the command line, write result to stdout */
cannam@89 426 int main(int argc, char **argv)
cannam@89 427 {
cannam@89 428 unsigned long crc, tot; /* running crc and total uncompressed length */
cannam@89 429
cannam@89 430 /* skip command name */
cannam@89 431 argc--;
cannam@89 432 argv++;
cannam@89 433
cannam@89 434 /* show usage if no arguments */
cannam@89 435 if (argc == 0) {
cannam@89 436 fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n",
cannam@89 437 stderr);
cannam@89 438 return 0;
cannam@89 439 }
cannam@89 440
cannam@89 441 /* join gzip files on command line and write to stdout */
cannam@89 442 gzinit(&crc, &tot, stdout);
cannam@89 443 while (argc--)
cannam@89 444 gzcopy(*argv++, argc, &crc, &tot, stdout);
cannam@89 445
cannam@89 446 /* done */
cannam@89 447 return 0;
cannam@89 448 }