cannam@128: /* fitblk.c: example of fitting compressed output to a specified size cannam@128: Not copyrighted -- provided to the public domain cannam@128: Version 1.1 25 November 2004 Mark Adler */ cannam@128: cannam@128: /* Version history: cannam@128: 1.0 24 Nov 2004 First version cannam@128: 1.1 25 Nov 2004 Change deflateInit2() to deflateInit() cannam@128: Use fixed-size, stack-allocated raw buffers cannam@128: Simplify code moving compression to subroutines cannam@128: Use assert() for internal errors cannam@128: Add detailed description of approach cannam@128: */ cannam@128: cannam@128: /* Approach to just fitting a requested compressed size: cannam@128: cannam@128: fitblk performs three compression passes on a portion of the input cannam@128: data in order to determine how much of that input will compress to cannam@128: nearly the requested output block size. The first pass generates cannam@128: enough deflate blocks to produce output to fill the requested cannam@128: output size plus a specfied excess amount (see the EXCESS define cannam@128: below). The last deflate block may go quite a bit past that, but cannam@128: is discarded. The second pass decompresses and recompresses just cannam@128: the compressed data that fit in the requested plus excess sized cannam@128: buffer. The deflate process is terminated after that amount of cannam@128: input, which is less than the amount consumed on the first pass. cannam@128: The last deflate block of the result will be of a comparable size cannam@128: to the final product, so that the header for that deflate block and cannam@128: the compression ratio for that block will be about the same as in cannam@128: the final product. The third compression pass decompresses the cannam@128: result of the second step, but only the compressed data up to the cannam@128: requested size minus an amount to allow the compressed stream to cannam@128: complete (see the MARGIN define below). That will result in a cannam@128: final compressed stream whose length is less than or equal to the cannam@128: requested size. Assuming sufficient input and a requested size cannam@128: greater than a few hundred bytes, the shortfall will typically be cannam@128: less than ten bytes. cannam@128: cannam@128: If the input is short enough that the first compression completes cannam@128: before filling the requested output size, then that compressed cannam@128: stream is return with no recompression. cannam@128: cannam@128: EXCESS is chosen to be just greater than the shortfall seen in a cannam@128: two pass approach similar to the above. That shortfall is due to cannam@128: the last deflate block compressing more efficiently with a smaller cannam@128: header on the second pass. EXCESS is set to be large enough so cannam@128: that there is enough uncompressed data for the second pass to fill cannam@128: out the requested size, and small enough so that the final deflate cannam@128: block of the second pass will be close in size to the final deflate cannam@128: block of the third and final pass. MARGIN is chosen to be just cannam@128: large enough to assure that the final compression has enough room cannam@128: to complete in all cases. cannam@128: */ cannam@128: cannam@128: #include cannam@128: #include cannam@128: #include cannam@128: #include "zlib.h" cannam@128: cannam@128: #define local static cannam@128: cannam@128: /* print nastygram and leave */ cannam@128: local void quit(char *why) cannam@128: { cannam@128: fprintf(stderr, "fitblk abort: %s\n", why); cannam@128: exit(1); cannam@128: } cannam@128: cannam@128: #define RAWLEN 4096 /* intermediate uncompressed buffer size */ cannam@128: cannam@128: /* compress from file to def until provided buffer is full or end of cannam@128: input reached; return last deflate() return value, or Z_ERRNO if cannam@128: there was read error on the file */ cannam@128: local int partcompress(FILE *in, z_streamp def) cannam@128: { cannam@128: int ret, flush; cannam@128: unsigned char raw[RAWLEN]; cannam@128: cannam@128: flush = Z_NO_FLUSH; cannam@128: do { cannam@128: def->avail_in = fread(raw, 1, RAWLEN, in); cannam@128: if (ferror(in)) cannam@128: return Z_ERRNO; cannam@128: def->next_in = raw; cannam@128: if (feof(in)) cannam@128: flush = Z_FINISH; cannam@128: ret = deflate(def, flush); cannam@128: assert(ret != Z_STREAM_ERROR); cannam@128: } while (def->avail_out != 0 && flush == Z_NO_FLUSH); cannam@128: return ret; cannam@128: } cannam@128: cannam@128: /* recompress from inf's input to def's output; the input for inf and cannam@128: the output for def are set in those structures before calling; cannam@128: return last deflate() return value, or Z_MEM_ERROR if inflate() cannam@128: was not able to allocate enough memory when it needed to */ cannam@128: local int recompress(z_streamp inf, z_streamp def) cannam@128: { cannam@128: int ret, flush; cannam@128: unsigned char raw[RAWLEN]; cannam@128: cannam@128: flush = Z_NO_FLUSH; cannam@128: do { cannam@128: /* decompress */ cannam@128: inf->avail_out = RAWLEN; cannam@128: inf->next_out = raw; cannam@128: ret = inflate(inf, Z_NO_FLUSH); cannam@128: assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR && cannam@128: ret != Z_NEED_DICT); cannam@128: if (ret == Z_MEM_ERROR) cannam@128: return ret; cannam@128: cannam@128: /* compress what was decompresed until done or no room */ cannam@128: def->avail_in = RAWLEN - inf->avail_out; cannam@128: def->next_in = raw; cannam@128: if (inf->avail_out != 0) cannam@128: flush = Z_FINISH; cannam@128: ret = deflate(def, flush); cannam@128: assert(ret != Z_STREAM_ERROR); cannam@128: } while (ret != Z_STREAM_END && def->avail_out != 0); cannam@128: return ret; cannam@128: } cannam@128: cannam@128: #define EXCESS 256 /* empirically determined stream overage */ cannam@128: #define MARGIN 8 /* amount to back off for completion */ cannam@128: cannam@128: /* compress from stdin to fixed-size block on stdout */ cannam@128: int main(int argc, char **argv) cannam@128: { cannam@128: int ret; /* return code */ cannam@128: unsigned size; /* requested fixed output block size */ cannam@128: unsigned have; /* bytes written by deflate() call */ cannam@128: unsigned char *blk; /* intermediate and final stream */ cannam@128: unsigned char *tmp; /* close to desired size stream */ cannam@128: z_stream def, inf; /* zlib deflate and inflate states */ cannam@128: cannam@128: /* get requested output size */ cannam@128: if (argc != 2) cannam@128: quit("need one argument: size of output block"); cannam@128: ret = strtol(argv[1], argv + 1, 10); cannam@128: if (argv[1][0] != 0) cannam@128: quit("argument must be a number"); cannam@128: if (ret < 8) /* 8 is minimum zlib stream size */ cannam@128: quit("need positive size of 8 or greater"); cannam@128: size = (unsigned)ret; cannam@128: cannam@128: /* allocate memory for buffers and compression engine */ cannam@128: blk = malloc(size + EXCESS); cannam@128: def.zalloc = Z_NULL; cannam@128: def.zfree = Z_NULL; cannam@128: def.opaque = Z_NULL; cannam@128: ret = deflateInit(&def, Z_DEFAULT_COMPRESSION); cannam@128: if (ret != Z_OK || blk == NULL) cannam@128: quit("out of memory"); cannam@128: cannam@128: /* compress from stdin until output full, or no more input */ cannam@128: def.avail_out = size + EXCESS; cannam@128: def.next_out = blk; cannam@128: ret = partcompress(stdin, &def); cannam@128: if (ret == Z_ERRNO) cannam@128: quit("error reading input"); cannam@128: cannam@128: /* if it all fit, then size was undersubscribed -- done! */ cannam@128: if (ret == Z_STREAM_END && def.avail_out >= EXCESS) { cannam@128: /* write block to stdout */ cannam@128: have = size + EXCESS - def.avail_out; cannam@128: if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) cannam@128: quit("error writing output"); cannam@128: cannam@128: /* clean up and print results to stderr */ cannam@128: ret = deflateEnd(&def); cannam@128: assert(ret != Z_STREAM_ERROR); cannam@128: free(blk); cannam@128: fprintf(stderr, cannam@128: "%u bytes unused out of %u requested (all input)\n", cannam@128: size - have, size); cannam@128: return 0; cannam@128: } cannam@128: cannam@128: /* it didn't all fit -- set up for recompression */ cannam@128: inf.zalloc = Z_NULL; cannam@128: inf.zfree = Z_NULL; cannam@128: inf.opaque = Z_NULL; cannam@128: inf.avail_in = 0; cannam@128: inf.next_in = Z_NULL; cannam@128: ret = inflateInit(&inf); cannam@128: tmp = malloc(size + EXCESS); cannam@128: if (ret != Z_OK || tmp == NULL) cannam@128: quit("out of memory"); cannam@128: ret = deflateReset(&def); cannam@128: assert(ret != Z_STREAM_ERROR); cannam@128: cannam@128: /* do first recompression close to the right amount */ cannam@128: inf.avail_in = size + EXCESS; cannam@128: inf.next_in = blk; cannam@128: def.avail_out = size + EXCESS; cannam@128: def.next_out = tmp; cannam@128: ret = recompress(&inf, &def); cannam@128: if (ret == Z_MEM_ERROR) cannam@128: quit("out of memory"); cannam@128: cannam@128: /* set up for next reocmpression */ cannam@128: ret = inflateReset(&inf); cannam@128: assert(ret != Z_STREAM_ERROR); cannam@128: ret = deflateReset(&def); cannam@128: assert(ret != Z_STREAM_ERROR); cannam@128: cannam@128: /* do second and final recompression (third compression) */ cannam@128: inf.avail_in = size - MARGIN; /* assure stream will complete */ cannam@128: inf.next_in = tmp; cannam@128: def.avail_out = size; cannam@128: def.next_out = blk; cannam@128: ret = recompress(&inf, &def); cannam@128: if (ret == Z_MEM_ERROR) cannam@128: quit("out of memory"); cannam@128: assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */ cannam@128: cannam@128: /* done -- write block to stdout */ cannam@128: have = size - def.avail_out; cannam@128: if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) cannam@128: quit("error writing output"); cannam@128: cannam@128: /* clean up and print results to stderr */ cannam@128: free(tmp); cannam@128: ret = inflateEnd(&inf); cannam@128: assert(ret != Z_STREAM_ERROR); cannam@128: ret = deflateEnd(&def); cannam@128: assert(ret != Z_STREAM_ERROR); cannam@128: free(blk); cannam@128: fprintf(stderr, cannam@128: "%u bytes unused out of %u requested (%lu input)\n", cannam@128: size - have, size, def.total_in); cannam@128: return 0; cannam@128: }