Mercurial > hg > sv-dependency-builds
comparison src/zlib-1.2.8/examples/zran.c @ 43:5ea0608b923f
Current zlib source
author | Chris Cannam |
---|---|
date | Tue, 18 Oct 2016 14:33:52 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
42:2cd0e3b3e1fd | 43:5ea0608b923f |
---|---|
1 /* zran.c -- example of zlib/gzip stream indexing and random access | |
2 * Copyright (C) 2005, 2012 Mark Adler | |
3 * For conditions of distribution and use, see copyright notice in zlib.h | |
4 Version 1.1 29 Sep 2012 Mark Adler */ | |
5 | |
6 /* Version History: | |
7 1.0 29 May 2005 First version | |
8 1.1 29 Sep 2012 Fix memory reallocation error | |
9 */ | |
10 | |
11 /* Illustrate the use of Z_BLOCK, inflatePrime(), and inflateSetDictionary() | |
12 for random access of a compressed file. A file containing a zlib or gzip | |
13 stream is provided on the command line. The compressed stream is decoded in | |
14 its entirety, and an index built with access points about every SPAN bytes | |
15 in the uncompressed output. The compressed file is left open, and can then | |
16 be read randomly, having to decompress on the average SPAN/2 uncompressed | |
17 bytes before getting to the desired block of data. | |
18 | |
19 An access point can be created at the start of any deflate block, by saving | |
20 the starting file offset and bit of that block, and the 32K bytes of | |
21 uncompressed data that precede that block. Also the uncompressed offset of | |
22 that block is saved to provide a referece for locating a desired starting | |
23 point in the uncompressed stream. build_index() works by decompressing the | |
24 input zlib or gzip stream a block at a time, and at the end of each block | |
25 deciding if enough uncompressed data has gone by to justify the creation of | |
26 a new access point. If so, that point is saved in a data structure that | |
27 grows as needed to accommodate the points. | |
28 | |
29 To use the index, an offset in the uncompressed data is provided, for which | |
30 the latest accees point at or preceding that offset is located in the index. | |
31 The input file is positioned to the specified location in the index, and if | |
32 necessary the first few bits of the compressed data is read from the file. | |
33 inflate is initialized with those bits and the 32K of uncompressed data, and | |
34 the decompression then proceeds until the desired offset in the file is | |
35 reached. Then the decompression continues to read the desired uncompressed | |
36 data from the file. | |
37 | |
38 Another approach would be to generate the index on demand. In that case, | |
39 requests for random access reads from the compressed data would try to use | |
40 the index, but if a read far enough past the end of the index is required, | |
41 then further index entries would be generated and added. | |
42 | |
43 There is some fair bit of overhead to starting inflation for the random | |
44 access, mainly copying the 32K byte dictionary. So if small pieces of the | |
45 file are being accessed, it would make sense to implement a cache to hold | |
46 some lookahead and avoid many calls to extract() for small lengths. | |
47 | |
48 Another way to build an index would be to use inflateCopy(). That would | |
49 not be constrained to have access points at block boundaries, but requires | |
50 more memory per access point, and also cannot be saved to file due to the | |
51 use of pointers in the state. The approach here allows for storage of the | |
52 index in a file. | |
53 */ | |
54 | |
55 #include <stdio.h> | |
56 #include <stdlib.h> | |
57 #include <string.h> | |
58 #include "zlib.h" | |
59 | |
60 #define local static | |
61 | |
62 #define SPAN 1048576L /* desired distance between access points */ | |
63 #define WINSIZE 32768U /* sliding window size */ | |
64 #define CHUNK 16384 /* file input buffer size */ | |
65 | |
66 /* access point entry */ | |
67 struct point { | |
68 off_t out; /* corresponding offset in uncompressed data */ | |
69 off_t in; /* offset in input file of first full byte */ | |
70 int bits; /* number of bits (1-7) from byte at in - 1, or 0 */ | |
71 unsigned char window[WINSIZE]; /* preceding 32K of uncompressed data */ | |
72 }; | |
73 | |
74 /* access point list */ | |
75 struct access { | |
76 int have; /* number of list entries filled in */ | |
77 int size; /* number of list entries allocated */ | |
78 struct point *list; /* allocated list */ | |
79 }; | |
80 | |
81 /* Deallocate an index built by build_index() */ | |
82 local void free_index(struct access *index) | |
83 { | |
84 if (index != NULL) { | |
85 free(index->list); | |
86 free(index); | |
87 } | |
88 } | |
89 | |
90 /* Add an entry to the access point list. If out of memory, deallocate the | |
91 existing list and return NULL. */ | |
92 local struct access *addpoint(struct access *index, int bits, | |
93 off_t in, off_t out, unsigned left, unsigned char *window) | |
94 { | |
95 struct point *next; | |
96 | |
97 /* if list is empty, create it (start with eight points) */ | |
98 if (index == NULL) { | |
99 index = malloc(sizeof(struct access)); | |
100 if (index == NULL) return NULL; | |
101 index->list = malloc(sizeof(struct point) << 3); | |
102 if (index->list == NULL) { | |
103 free(index); | |
104 return NULL; | |
105 } | |
106 index->size = 8; | |
107 index->have = 0; | |
108 } | |
109 | |
110 /* if list is full, make it bigger */ | |
111 else if (index->have == index->size) { | |
112 index->size <<= 1; | |
113 next = realloc(index->list, sizeof(struct point) * index->size); | |
114 if (next == NULL) { | |
115 free_index(index); | |
116 return NULL; | |
117 } | |
118 index->list = next; | |
119 } | |
120 | |
121 /* fill in entry and increment how many we have */ | |
122 next = index->list + index->have; | |
123 next->bits = bits; | |
124 next->in = in; | |
125 next->out = out; | |
126 if (left) | |
127 memcpy(next->window, window + WINSIZE - left, left); | |
128 if (left < WINSIZE) | |
129 memcpy(next->window + left, window, WINSIZE - left); | |
130 index->have++; | |
131 | |
132 /* return list, possibly reallocated */ | |
133 return index; | |
134 } | |
135 | |
136 /* Make one entire pass through the compressed stream and build an index, with | |
137 access points about every span bytes of uncompressed output -- span is | |
138 chosen to balance the speed of random access against the memory requirements | |
139 of the list, about 32K bytes per access point. Note that data after the end | |
140 of the first zlib or gzip stream in the file is ignored. build_index() | |
141 returns the number of access points on success (>= 1), Z_MEM_ERROR for out | |
142 of memory, Z_DATA_ERROR for an error in the input file, or Z_ERRNO for a | |
143 file read error. On success, *built points to the resulting index. */ | |
144 local int build_index(FILE *in, off_t span, struct access **built) | |
145 { | |
146 int ret; | |
147 off_t totin, totout; /* our own total counters to avoid 4GB limit */ | |
148 off_t last; /* totout value of last access point */ | |
149 struct access *index; /* access points being generated */ | |
150 z_stream strm; | |
151 unsigned char input[CHUNK]; | |
152 unsigned char window[WINSIZE]; | |
153 | |
154 /* initialize inflate */ | |
155 strm.zalloc = Z_NULL; | |
156 strm.zfree = Z_NULL; | |
157 strm.opaque = Z_NULL; | |
158 strm.avail_in = 0; | |
159 strm.next_in = Z_NULL; | |
160 ret = inflateInit2(&strm, 47); /* automatic zlib or gzip decoding */ | |
161 if (ret != Z_OK) | |
162 return ret; | |
163 | |
164 /* inflate the input, maintain a sliding window, and build an index -- this | |
165 also validates the integrity of the compressed data using the check | |
166 information at the end of the gzip or zlib stream */ | |
167 totin = totout = last = 0; | |
168 index = NULL; /* will be allocated by first addpoint() */ | |
169 strm.avail_out = 0; | |
170 do { | |
171 /* get some compressed data from input file */ | |
172 strm.avail_in = fread(input, 1, CHUNK, in); | |
173 if (ferror(in)) { | |
174 ret = Z_ERRNO; | |
175 goto build_index_error; | |
176 } | |
177 if (strm.avail_in == 0) { | |
178 ret = Z_DATA_ERROR; | |
179 goto build_index_error; | |
180 } | |
181 strm.next_in = input; | |
182 | |
183 /* process all of that, or until end of stream */ | |
184 do { | |
185 /* reset sliding window if necessary */ | |
186 if (strm.avail_out == 0) { | |
187 strm.avail_out = WINSIZE; | |
188 strm.next_out = window; | |
189 } | |
190 | |
191 /* inflate until out of input, output, or at end of block -- | |
192 update the total input and output counters */ | |
193 totin += strm.avail_in; | |
194 totout += strm.avail_out; | |
195 ret = inflate(&strm, Z_BLOCK); /* return at end of block */ | |
196 totin -= strm.avail_in; | |
197 totout -= strm.avail_out; | |
198 if (ret == Z_NEED_DICT) | |
199 ret = Z_DATA_ERROR; | |
200 if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR) | |
201 goto build_index_error; | |
202 if (ret == Z_STREAM_END) | |
203 break; | |
204 | |
205 /* if at end of block, consider adding an index entry (note that if | |
206 data_type indicates an end-of-block, then all of the | |
207 uncompressed data from that block has been delivered, and none | |
208 of the compressed data after that block has been consumed, | |
209 except for up to seven bits) -- the totout == 0 provides an | |
210 entry point after the zlib or gzip header, and assures that the | |
211 index always has at least one access point; we avoid creating an | |
212 access point after the last block by checking bit 6 of data_type | |
213 */ | |
214 if ((strm.data_type & 128) && !(strm.data_type & 64) && | |
215 (totout == 0 || totout - last > span)) { | |
216 index = addpoint(index, strm.data_type & 7, totin, | |
217 totout, strm.avail_out, window); | |
218 if (index == NULL) { | |
219 ret = Z_MEM_ERROR; | |
220 goto build_index_error; | |
221 } | |
222 last = totout; | |
223 } | |
224 } while (strm.avail_in != 0); | |
225 } while (ret != Z_STREAM_END); | |
226 | |
227 /* clean up and return index (release unused entries in list) */ | |
228 (void)inflateEnd(&strm); | |
229 index->list = realloc(index->list, sizeof(struct point) * index->have); | |
230 index->size = index->have; | |
231 *built = index; | |
232 return index->size; | |
233 | |
234 /* return error */ | |
235 build_index_error: | |
236 (void)inflateEnd(&strm); | |
237 if (index != NULL) | |
238 free_index(index); | |
239 return ret; | |
240 } | |
241 | |
242 /* Use the index to read len bytes from offset into buf, return bytes read or | |
243 negative for error (Z_DATA_ERROR or Z_MEM_ERROR). If data is requested past | |
244 the end of the uncompressed data, then extract() will return a value less | |
245 than len, indicating how much as actually read into buf. This function | |
246 should not return a data error unless the file was modified since the index | |
247 was generated. extract() may also return Z_ERRNO if there is an error on | |
248 reading or seeking the input file. */ | |
249 local int extract(FILE *in, struct access *index, off_t offset, | |
250 unsigned char *buf, int len) | |
251 { | |
252 int ret, skip; | |
253 z_stream strm; | |
254 struct point *here; | |
255 unsigned char input[CHUNK]; | |
256 unsigned char discard[WINSIZE]; | |
257 | |
258 /* proceed only if something reasonable to do */ | |
259 if (len < 0) | |
260 return 0; | |
261 | |
262 /* find where in stream to start */ | |
263 here = index->list; | |
264 ret = index->have; | |
265 while (--ret && here[1].out <= offset) | |
266 here++; | |
267 | |
268 /* initialize file and inflate state to start there */ | |
269 strm.zalloc = Z_NULL; | |
270 strm.zfree = Z_NULL; | |
271 strm.opaque = Z_NULL; | |
272 strm.avail_in = 0; | |
273 strm.next_in = Z_NULL; | |
274 ret = inflateInit2(&strm, -15); /* raw inflate */ | |
275 if (ret != Z_OK) | |
276 return ret; | |
277 ret = fseeko(in, here->in - (here->bits ? 1 : 0), SEEK_SET); | |
278 if (ret == -1) | |
279 goto extract_ret; | |
280 if (here->bits) { | |
281 ret = getc(in); | |
282 if (ret == -1) { | |
283 ret = ferror(in) ? Z_ERRNO : Z_DATA_ERROR; | |
284 goto extract_ret; | |
285 } | |
286 (void)inflatePrime(&strm, here->bits, ret >> (8 - here->bits)); | |
287 } | |
288 (void)inflateSetDictionary(&strm, here->window, WINSIZE); | |
289 | |
290 /* skip uncompressed bytes until offset reached, then satisfy request */ | |
291 offset -= here->out; | |
292 strm.avail_in = 0; | |
293 skip = 1; /* while skipping to offset */ | |
294 do { | |
295 /* define where to put uncompressed data, and how much */ | |
296 if (offset == 0 && skip) { /* at offset now */ | |
297 strm.avail_out = len; | |
298 strm.next_out = buf; | |
299 skip = 0; /* only do this once */ | |
300 } | |
301 if (offset > WINSIZE) { /* skip WINSIZE bytes */ | |
302 strm.avail_out = WINSIZE; | |
303 strm.next_out = discard; | |
304 offset -= WINSIZE; | |
305 } | |
306 else if (offset != 0) { /* last skip */ | |
307 strm.avail_out = (unsigned)offset; | |
308 strm.next_out = discard; | |
309 offset = 0; | |
310 } | |
311 | |
312 /* uncompress until avail_out filled, or end of stream */ | |
313 do { | |
314 if (strm.avail_in == 0) { | |
315 strm.avail_in = fread(input, 1, CHUNK, in); | |
316 if (ferror(in)) { | |
317 ret = Z_ERRNO; | |
318 goto extract_ret; | |
319 } | |
320 if (strm.avail_in == 0) { | |
321 ret = Z_DATA_ERROR; | |
322 goto extract_ret; | |
323 } | |
324 strm.next_in = input; | |
325 } | |
326 ret = inflate(&strm, Z_NO_FLUSH); /* normal inflate */ | |
327 if (ret == Z_NEED_DICT) | |
328 ret = Z_DATA_ERROR; | |
329 if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR) | |
330 goto extract_ret; | |
331 if (ret == Z_STREAM_END) | |
332 break; | |
333 } while (strm.avail_out != 0); | |
334 | |
335 /* if reach end of stream, then don't keep trying to get more */ | |
336 if (ret == Z_STREAM_END) | |
337 break; | |
338 | |
339 /* do until offset reached and requested data read, or stream ends */ | |
340 } while (skip); | |
341 | |
342 /* compute number of uncompressed bytes read after offset */ | |
343 ret = skip ? 0 : len - strm.avail_out; | |
344 | |
345 /* clean up and return bytes read or error */ | |
346 extract_ret: | |
347 (void)inflateEnd(&strm); | |
348 return ret; | |
349 } | |
350 | |
351 /* Demonstrate the use of build_index() and extract() by processing the file | |
352 provided on the command line, and the extracting 16K from about 2/3rds of | |
353 the way through the uncompressed output, and writing that to stdout. */ | |
354 int main(int argc, char **argv) | |
355 { | |
356 int len; | |
357 off_t offset; | |
358 FILE *in; | |
359 struct access *index = NULL; | |
360 unsigned char buf[CHUNK]; | |
361 | |
362 /* open input file */ | |
363 if (argc != 2) { | |
364 fprintf(stderr, "usage: zran file.gz\n"); | |
365 return 1; | |
366 } | |
367 in = fopen(argv[1], "rb"); | |
368 if (in == NULL) { | |
369 fprintf(stderr, "zran: could not open %s for reading\n", argv[1]); | |
370 return 1; | |
371 } | |
372 | |
373 /* build index */ | |
374 len = build_index(in, SPAN, &index); | |
375 if (len < 0) { | |
376 fclose(in); | |
377 switch (len) { | |
378 case Z_MEM_ERROR: | |
379 fprintf(stderr, "zran: out of memory\n"); | |
380 break; | |
381 case Z_DATA_ERROR: | |
382 fprintf(stderr, "zran: compressed data error in %s\n", argv[1]); | |
383 break; | |
384 case Z_ERRNO: | |
385 fprintf(stderr, "zran: read error on %s\n", argv[1]); | |
386 break; | |
387 default: | |
388 fprintf(stderr, "zran: error %d while building index\n", len); | |
389 } | |
390 return 1; | |
391 } | |
392 fprintf(stderr, "zran: built index with %d access points\n", len); | |
393 | |
394 /* use index by reading some bytes from an arbitrary offset */ | |
395 offset = (index->list[index->have - 1].out << 1) / 3; | |
396 len = extract(in, index, offset, buf, CHUNK); | |
397 if (len < 0) | |
398 fprintf(stderr, "zran: extraction failed: %s error\n", | |
399 len == Z_MEM_ERROR ? "out of memory" : "input corrupted"); | |
400 else { | |
401 fwrite(buf, 1, len, stdout); | |
402 fprintf(stderr, "zran: extracted %d bytes at %llu\n", len, offset); | |
403 } | |
404 | |
405 /* clean up and exit */ | |
406 free_index(index); | |
407 fclose(in); | |
408 return 0; | |
409 } |