Mercurial > hg > sv-dependency-builds
comparison src/zlib-1.2.8/examples/gzjoin.c @ 43:5ea0608b923f
Current zlib source
author | Chris Cannam |
---|---|
date | Tue, 18 Oct 2016 14:33:52 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
42:2cd0e3b3e1fd | 43:5ea0608b923f |
---|---|
1 /* gzjoin -- command to join gzip files into one gzip file | |
2 | |
3 Copyright (C) 2004, 2005, 2012 Mark Adler, all rights reserved | |
4 version 1.2, 14 Aug 2012 | |
5 | |
6 This software is provided 'as-is', without any express or implied | |
7 warranty. In no event will the author be held liable for any damages | |
8 arising from the use of this software. | |
9 | |
10 Permission is granted to anyone to use this software for any purpose, | |
11 including commercial applications, and to alter it and redistribute it | |
12 freely, subject to the following restrictions: | |
13 | |
14 1. The origin of this software must not be misrepresented; you must not | |
15 claim that you wrote the original software. If you use this software | |
16 in a product, an acknowledgment in the product documentation would be | |
17 appreciated but is not required. | |
18 2. Altered source versions must be plainly marked as such, and must not be | |
19 misrepresented as being the original software. | |
20 3. This notice may not be removed or altered from any source distribution. | |
21 | |
22 Mark Adler madler@alumni.caltech.edu | |
23 */ | |
24 | |
25 /* | |
26 * Change history: | |
27 * | |
28 * 1.0 11 Dec 2004 - First version | |
29 * 1.1 12 Jun 2005 - Changed ssize_t to long for portability | |
30 * 1.2 14 Aug 2012 - Clean up for z_const usage | |
31 */ | |
32 | |
33 /* | |
34 gzjoin takes one or more gzip files on the command line and writes out a | |
35 single gzip file that will uncompress to the concatenation of the | |
36 uncompressed data from the individual gzip files. gzjoin does this without | |
37 having to recompress any of the data and without having to calculate a new | |
38 crc32 for the concatenated uncompressed data. gzjoin does however have to | |
39 decompress all of the input data in order to find the bits in the compressed | |
40 data that need to be modified to concatenate the streams. | |
41 | |
42 gzjoin does not do an integrity check on the input gzip files other than | |
43 checking the gzip header and decompressing the compressed data. They are | |
44 otherwise assumed to be complete and correct. | |
45 | |
46 Each joint between gzip files removes at least 18 bytes of previous trailer | |
47 and subsequent header, and inserts an average of about three bytes to the | |
48 compressed data in order to connect the streams. The output gzip file | |
49 has a minimal ten-byte gzip header with no file name or modification time. | |
50 | |
51 This program was written to illustrate the use of the Z_BLOCK option of | |
52 inflate() and the crc32_combine() function. gzjoin will not compile with | |
53 versions of zlib earlier than 1.2.3. | |
54 */ | |
55 | |
56 #include <stdio.h> /* fputs(), fprintf(), fwrite(), putc() */ | |
57 #include <stdlib.h> /* exit(), malloc(), free() */ | |
58 #include <fcntl.h> /* open() */ | |
59 #include <unistd.h> /* close(), read(), lseek() */ | |
60 #include "zlib.h" | |
61 /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */ | |
62 | |
63 #define local static | |
64 | |
65 /* exit with an error (return a value to allow use in an expression) */ | |
66 local int bail(char *why1, char *why2) | |
67 { | |
68 fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2); | |
69 exit(1); | |
70 return 0; | |
71 } | |
72 | |
73 /* -- simple buffered file input with access to the buffer -- */ | |
74 | |
75 #define CHUNK 32768 /* must be a power of two and fit in unsigned */ | |
76 | |
77 /* bin buffered input file type */ | |
78 typedef struct { | |
79 char *name; /* name of file for error messages */ | |
80 int fd; /* file descriptor */ | |
81 unsigned left; /* bytes remaining at next */ | |
82 unsigned char *next; /* next byte to read */ | |
83 unsigned char *buf; /* allocated buffer of length CHUNK */ | |
84 } bin; | |
85 | |
86 /* close a buffered file and free allocated memory */ | |
87 local void bclose(bin *in) | |
88 { | |
89 if (in != NULL) { | |
90 if (in->fd != -1) | |
91 close(in->fd); | |
92 if (in->buf != NULL) | |
93 free(in->buf); | |
94 free(in); | |
95 } | |
96 } | |
97 | |
98 /* open a buffered file for input, return a pointer to type bin, or NULL on | |
99 failure */ | |
100 local bin *bopen(char *name) | |
101 { | |
102 bin *in; | |
103 | |
104 in = malloc(sizeof(bin)); | |
105 if (in == NULL) | |
106 return NULL; | |
107 in->buf = malloc(CHUNK); | |
108 in->fd = open(name, O_RDONLY, 0); | |
109 if (in->buf == NULL || in->fd == -1) { | |
110 bclose(in); | |
111 return NULL; | |
112 } | |
113 in->left = 0; | |
114 in->next = in->buf; | |
115 in->name = name; | |
116 return in; | |
117 } | |
118 | |
119 /* load buffer from file, return -1 on read error, 0 or 1 on success, with | |
120 1 indicating that end-of-file was reached */ | |
121 local int bload(bin *in) | |
122 { | |
123 long len; | |
124 | |
125 if (in == NULL) | |
126 return -1; | |
127 if (in->left != 0) | |
128 return 0; | |
129 in->next = in->buf; | |
130 do { | |
131 len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left); | |
132 if (len < 0) | |
133 return -1; | |
134 in->left += (unsigned)len; | |
135 } while (len != 0 && in->left < CHUNK); | |
136 return len == 0 ? 1 : 0; | |
137 } | |
138 | |
139 /* get a byte from the file, bail if end of file */ | |
140 #define bget(in) (in->left ? 0 : bload(in), \ | |
141 in->left ? (in->left--, *(in->next)++) : \ | |
142 bail("unexpected end of file on ", in->name)) | |
143 | |
144 /* get a four-byte little-endian unsigned integer from file */ | |
145 local unsigned long bget4(bin *in) | |
146 { | |
147 unsigned long val; | |
148 | |
149 val = bget(in); | |
150 val += (unsigned long)(bget(in)) << 8; | |
151 val += (unsigned long)(bget(in)) << 16; | |
152 val += (unsigned long)(bget(in)) << 24; | |
153 return val; | |
154 } | |
155 | |
156 /* skip bytes in file */ | |
157 local void bskip(bin *in, unsigned skip) | |
158 { | |
159 /* check pointer */ | |
160 if (in == NULL) | |
161 return; | |
162 | |
163 /* easy case -- skip bytes in buffer */ | |
164 if (skip <= in->left) { | |
165 in->left -= skip; | |
166 in->next += skip; | |
167 return; | |
168 } | |
169 | |
170 /* skip what's in buffer, discard buffer contents */ | |
171 skip -= in->left; | |
172 in->left = 0; | |
173 | |
174 /* seek past multiples of CHUNK bytes */ | |
175 if (skip > CHUNK) { | |
176 unsigned left; | |
177 | |
178 left = skip & (CHUNK - 1); | |
179 if (left == 0) { | |
180 /* exact number of chunks: seek all the way minus one byte to check | |
181 for end-of-file with a read */ | |
182 lseek(in->fd, skip - 1, SEEK_CUR); | |
183 if (read(in->fd, in->buf, 1) != 1) | |
184 bail("unexpected end of file on ", in->name); | |
185 return; | |
186 } | |
187 | |
188 /* skip the integral chunks, update skip with remainder */ | |
189 lseek(in->fd, skip - left, SEEK_CUR); | |
190 skip = left; | |
191 } | |
192 | |
193 /* read more input and skip remainder */ | |
194 bload(in); | |
195 if (skip > in->left) | |
196 bail("unexpected end of file on ", in->name); | |
197 in->left -= skip; | |
198 in->next += skip; | |
199 } | |
200 | |
201 /* -- end of buffered input functions -- */ | |
202 | |
203 /* skip the gzip header from file in */ | |
204 local void gzhead(bin *in) | |
205 { | |
206 int flags; | |
207 | |
208 /* verify gzip magic header and compression method */ | |
209 if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8) | |
210 bail(in->name, " is not a valid gzip file"); | |
211 | |
212 /* get and verify flags */ | |
213 flags = bget(in); | |
214 if ((flags & 0xe0) != 0) | |
215 bail("unknown reserved bits set in ", in->name); | |
216 | |
217 /* skip modification time, extra flags, and os */ | |
218 bskip(in, 6); | |
219 | |
220 /* skip extra field if present */ | |
221 if (flags & 4) { | |
222 unsigned len; | |
223 | |
224 len = bget(in); | |
225 len += (unsigned)(bget(in)) << 8; | |
226 bskip(in, len); | |
227 } | |
228 | |
229 /* skip file name if present */ | |
230 if (flags & 8) | |
231 while (bget(in) != 0) | |
232 ; | |
233 | |
234 /* skip comment if present */ | |
235 if (flags & 16) | |
236 while (bget(in) != 0) | |
237 ; | |
238 | |
239 /* skip header crc if present */ | |
240 if (flags & 2) | |
241 bskip(in, 2); | |
242 } | |
243 | |
244 /* write a four-byte little-endian unsigned integer to out */ | |
245 local void put4(unsigned long val, FILE *out) | |
246 { | |
247 putc(val & 0xff, out); | |
248 putc((val >> 8) & 0xff, out); | |
249 putc((val >> 16) & 0xff, out); | |
250 putc((val >> 24) & 0xff, out); | |
251 } | |
252 | |
253 /* Load up zlib stream from buffered input, bail if end of file */ | |
254 local void zpull(z_streamp strm, bin *in) | |
255 { | |
256 if (in->left == 0) | |
257 bload(in); | |
258 if (in->left == 0) | |
259 bail("unexpected end of file on ", in->name); | |
260 strm->avail_in = in->left; | |
261 strm->next_in = in->next; | |
262 } | |
263 | |
264 /* Write header for gzip file to out and initialize trailer. */ | |
265 local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out) | |
266 { | |
267 fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out); | |
268 *crc = crc32(0L, Z_NULL, 0); | |
269 *tot = 0; | |
270 } | |
271 | |
272 /* Copy the compressed data from name, zeroing the last block bit of the last | |
273 block if clr is true, and adding empty blocks as needed to get to a byte | |
274 boundary. If clr is false, then the last block becomes the last block of | |
275 the output, and the gzip trailer is written. crc and tot maintains the | |
276 crc and length (modulo 2^32) of the output for the trailer. The resulting | |
277 gzip file is written to out. gzinit() must be called before the first call | |
278 of gzcopy() to write the gzip header and to initialize crc and tot. */ | |
279 local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot, | |
280 FILE *out) | |
281 { | |
282 int ret; /* return value from zlib functions */ | |
283 int pos; /* where the "last block" bit is in byte */ | |
284 int last; /* true if processing the last block */ | |
285 bin *in; /* buffered input file */ | |
286 unsigned char *start; /* start of compressed data in buffer */ | |
287 unsigned char *junk; /* buffer for uncompressed data -- discarded */ | |
288 z_off_t len; /* length of uncompressed data (support > 4 GB) */ | |
289 z_stream strm; /* zlib inflate stream */ | |
290 | |
291 /* open gzip file and skip header */ | |
292 in = bopen(name); | |
293 if (in == NULL) | |
294 bail("could not open ", name); | |
295 gzhead(in); | |
296 | |
297 /* allocate buffer for uncompressed data and initialize raw inflate | |
298 stream */ | |
299 junk = malloc(CHUNK); | |
300 strm.zalloc = Z_NULL; | |
301 strm.zfree = Z_NULL; | |
302 strm.opaque = Z_NULL; | |
303 strm.avail_in = 0; | |
304 strm.next_in = Z_NULL; | |
305 ret = inflateInit2(&strm, -15); | |
306 if (junk == NULL || ret != Z_OK) | |
307 bail("out of memory", ""); | |
308 | |
309 /* inflate and copy compressed data, clear last-block bit if requested */ | |
310 len = 0; | |
311 zpull(&strm, in); | |
312 start = in->next; | |
313 last = start[0] & 1; | |
314 if (last && clr) | |
315 start[0] &= ~1; | |
316 strm.avail_out = 0; | |
317 for (;;) { | |
318 /* if input used and output done, write used input and get more */ | |
319 if (strm.avail_in == 0 && strm.avail_out != 0) { | |
320 fwrite(start, 1, strm.next_in - start, out); | |
321 start = in->buf; | |
322 in->left = 0; | |
323 zpull(&strm, in); | |
324 } | |
325 | |
326 /* decompress -- return early when end-of-block reached */ | |
327 strm.avail_out = CHUNK; | |
328 strm.next_out = junk; | |
329 ret = inflate(&strm, Z_BLOCK); | |
330 switch (ret) { | |
331 case Z_MEM_ERROR: | |
332 bail("out of memory", ""); | |
333 case Z_DATA_ERROR: | |
334 bail("invalid compressed data in ", in->name); | |
335 } | |
336 | |
337 /* update length of uncompressed data */ | |
338 len += CHUNK - strm.avail_out; | |
339 | |
340 /* check for block boundary (only get this when block copied out) */ | |
341 if (strm.data_type & 128) { | |
342 /* if that was the last block, then done */ | |
343 if (last) | |
344 break; | |
345 | |
346 /* number of unused bits in last byte */ | |
347 pos = strm.data_type & 7; | |
348 | |
349 /* find the next last-block bit */ | |
350 if (pos != 0) { | |
351 /* next last-block bit is in last used byte */ | |
352 pos = 0x100 >> pos; | |
353 last = strm.next_in[-1] & pos; | |
354 if (last && clr) | |
355 in->buf[strm.next_in - in->buf - 1] &= ~pos; | |
356 } | |
357 else { | |
358 /* next last-block bit is in next unused byte */ | |
359 if (strm.avail_in == 0) { | |
360 /* don't have that byte yet -- get it */ | |
361 fwrite(start, 1, strm.next_in - start, out); | |
362 start = in->buf; | |
363 in->left = 0; | |
364 zpull(&strm, in); | |
365 } | |
366 last = strm.next_in[0] & 1; | |
367 if (last && clr) | |
368 in->buf[strm.next_in - in->buf] &= ~1; | |
369 } | |
370 } | |
371 } | |
372 | |
373 /* update buffer with unused input */ | |
374 in->left = strm.avail_in; | |
375 in->next = in->buf + (strm.next_in - in->buf); | |
376 | |
377 /* copy used input, write empty blocks to get to byte boundary */ | |
378 pos = strm.data_type & 7; | |
379 fwrite(start, 1, in->next - start - 1, out); | |
380 last = in->next[-1]; | |
381 if (pos == 0 || !clr) | |
382 /* already at byte boundary, or last file: write last byte */ | |
383 putc(last, out); | |
384 else { | |
385 /* append empty blocks to last byte */ | |
386 last &= ((0x100 >> pos) - 1); /* assure unused bits are zero */ | |
387 if (pos & 1) { | |
388 /* odd -- append an empty stored block */ | |
389 putc(last, out); | |
390 if (pos == 1) | |
391 putc(0, out); /* two more bits in block header */ | |
392 fwrite("\0\0\xff\xff", 1, 4, out); | |
393 } | |
394 else { | |
395 /* even -- append 1, 2, or 3 empty fixed blocks */ | |
396 switch (pos) { | |
397 case 6: | |
398 putc(last | 8, out); | |
399 last = 0; | |
400 case 4: | |
401 putc(last | 0x20, out); | |
402 last = 0; | |
403 case 2: | |
404 putc(last | 0x80, out); | |
405 putc(0, out); | |
406 } | |
407 } | |
408 } | |
409 | |
410 /* update crc and tot */ | |
411 *crc = crc32_combine(*crc, bget4(in), len); | |
412 *tot += (unsigned long)len; | |
413 | |
414 /* clean up */ | |
415 inflateEnd(&strm); | |
416 free(junk); | |
417 bclose(in); | |
418 | |
419 /* write trailer if this is the last gzip file */ | |
420 if (!clr) { | |
421 put4(*crc, out); | |
422 put4(*tot, out); | |
423 } | |
424 } | |
425 | |
426 /* join the gzip files on the command line, write result to stdout */ | |
427 int main(int argc, char **argv) | |
428 { | |
429 unsigned long crc, tot; /* running crc and total uncompressed length */ | |
430 | |
431 /* skip command name */ | |
432 argc--; | |
433 argv++; | |
434 | |
435 /* show usage if no arguments */ | |
436 if (argc == 0) { | |
437 fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n", | |
438 stderr); | |
439 return 0; | |
440 } | |
441 | |
442 /* join gzip files on command line and write to stdout */ | |
443 gzinit(&crc, &tot, stdout); | |
444 while (argc--) | |
445 gzcopy(*argv++, argc, &crc, &tot, stdout); | |
446 | |
447 /* done */ | |
448 return 0; | |
449 } |