cannam@89: /* gzread.c -- zlib functions for reading gzip files cannam@89: * Copyright (C) 2004, 2005, 2010, 2011, 2012 Mark Adler cannam@89: * For conditions of distribution and use, see copyright notice in zlib.h cannam@89: */ cannam@89: cannam@89: #include "gzguts.h" cannam@89: cannam@89: /* Local functions */ cannam@89: local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *)); cannam@89: local int gz_avail OF((gz_statep)); cannam@89: local int gz_look OF((gz_statep)); cannam@89: local int gz_decomp OF((gz_statep)); cannam@89: local int gz_fetch OF((gz_statep)); cannam@89: local int gz_skip OF((gz_statep, z_off64_t)); cannam@89: cannam@89: /* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from cannam@89: state->fd, and update state->eof, state->err, and state->msg as appropriate. cannam@89: This function needs to loop on read(), since read() is not guaranteed to cannam@89: read the number of bytes requested, depending on the type of descriptor. */ cannam@89: local int gz_load(state, buf, len, have) cannam@89: gz_statep state; cannam@89: unsigned char *buf; cannam@89: unsigned len; cannam@89: unsigned *have; cannam@89: { cannam@89: int ret; cannam@89: cannam@89: *have = 0; cannam@89: do { cannam@89: ret = read(state->fd, buf + *have, len - *have); cannam@89: if (ret <= 0) cannam@89: break; cannam@89: *have += ret; cannam@89: } while (*have < len); cannam@89: if (ret < 0) { cannam@89: gz_error(state, Z_ERRNO, zstrerror()); cannam@89: return -1; cannam@89: } cannam@89: if (ret == 0) cannam@89: state->eof = 1; cannam@89: return 0; cannam@89: } cannam@89: cannam@89: /* Load up input buffer and set eof flag if last data loaded -- return -1 on cannam@89: error, 0 otherwise. Note that the eof flag is set when the end of the input cannam@89: file is reached, even though there may be unused data in the buffer. Once cannam@89: that data has been used, no more attempts will be made to read the file. cannam@89: If strm->avail_in != 0, then the current data is moved to the beginning of cannam@89: the input buffer, and then the remainder of the buffer is loaded with the cannam@89: available data from the input file. */ cannam@89: local int gz_avail(state) cannam@89: gz_statep state; cannam@89: { cannam@89: unsigned got; cannam@89: z_streamp strm = &(state->strm); cannam@89: cannam@89: if (state->err != Z_OK && state->err != Z_BUF_ERROR) cannam@89: return -1; cannam@89: if (state->eof == 0) { cannam@89: if (strm->avail_in) { /* copy what's there to the start */ cannam@89: unsigned char *p = state->in, *q = strm->next_in; cannam@89: unsigned n = strm->avail_in; cannam@89: do { cannam@89: *p++ = *q++; cannam@89: } while (--n); cannam@89: } cannam@89: if (gz_load(state, state->in + strm->avail_in, cannam@89: state->size - strm->avail_in, &got) == -1) cannam@89: return -1; cannam@89: strm->avail_in += got; cannam@89: strm->next_in = state->in; cannam@89: } cannam@89: return 0; cannam@89: } cannam@89: cannam@89: /* Look for gzip header, set up for inflate or copy. state->x.have must be 0. cannam@89: If this is the first time in, allocate required memory. state->how will be cannam@89: left unchanged if there is no more input data available, will be set to COPY cannam@89: if there is no gzip header and direct copying will be performed, or it will cannam@89: be set to GZIP for decompression. If direct copying, then leftover input cannam@89: data from the input buffer will be copied to the output buffer. In that cannam@89: case, all further file reads will be directly to either the output buffer or cannam@89: a user buffer. If decompressing, the inflate state will be initialized. cannam@89: gz_look() will return 0 on success or -1 on failure. */ cannam@89: local int gz_look(state) cannam@89: gz_statep state; cannam@89: { cannam@89: z_streamp strm = &(state->strm); cannam@89: cannam@89: /* allocate read buffers and inflate memory */ cannam@89: if (state->size == 0) { cannam@89: /* allocate buffers */ cannam@89: state->in = malloc(state->want); cannam@89: state->out = malloc(state->want << 1); cannam@89: if (state->in == NULL || state->out == NULL) { cannam@89: if (state->out != NULL) cannam@89: free(state->out); cannam@89: if (state->in != NULL) cannam@89: free(state->in); cannam@89: gz_error(state, Z_MEM_ERROR, "out of memory"); cannam@89: return -1; cannam@89: } cannam@89: state->size = state->want; cannam@89: cannam@89: /* allocate inflate memory */ cannam@89: state->strm.zalloc = Z_NULL; cannam@89: state->strm.zfree = Z_NULL; cannam@89: state->strm.opaque = Z_NULL; cannam@89: state->strm.avail_in = 0; cannam@89: state->strm.next_in = Z_NULL; cannam@89: if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) { /* gunzip */ cannam@89: free(state->out); cannam@89: free(state->in); cannam@89: state->size = 0; cannam@89: gz_error(state, Z_MEM_ERROR, "out of memory"); cannam@89: return -1; cannam@89: } cannam@89: } cannam@89: cannam@89: /* get at least the magic bytes in the input buffer */ cannam@89: if (strm->avail_in < 2) { cannam@89: if (gz_avail(state) == -1) cannam@89: return -1; cannam@89: if (strm->avail_in == 0) cannam@89: return 0; cannam@89: } cannam@89: cannam@89: /* look for gzip magic bytes -- if there, do gzip decoding (note: there is cannam@89: a logical dilemma here when considering the case of a partially written cannam@89: gzip file, to wit, if a single 31 byte is written, then we cannot tell cannam@89: whether this is a single-byte file, or just a partially written gzip cannam@89: file -- for here we assume that if a gzip file is being written, then cannam@89: the header will be written in a single operation, so that reading a cannam@89: single byte is sufficient indication that it is not a gzip file) */ cannam@89: if (strm->avail_in > 1 && cannam@89: strm->next_in[0] == 31 && strm->next_in[1] == 139) { cannam@89: inflateReset(strm); cannam@89: state->how = GZIP; cannam@89: state->direct = 0; cannam@89: return 0; cannam@89: } cannam@89: cannam@89: /* no gzip header -- if we were decoding gzip before, then this is trailing cannam@89: garbage. Ignore the trailing garbage and finish. */ cannam@89: if (state->direct == 0) { cannam@89: strm->avail_in = 0; cannam@89: state->eof = 1; cannam@89: state->x.have = 0; cannam@89: return 0; cannam@89: } cannam@89: cannam@89: /* doing raw i/o, copy any leftover input to output -- this assumes that cannam@89: the output buffer is larger than the input buffer, which also assures cannam@89: space for gzungetc() */ cannam@89: state->x.next = state->out; cannam@89: if (strm->avail_in) { cannam@89: memcpy(state->x.next, strm->next_in, strm->avail_in); cannam@89: state->x.have = strm->avail_in; cannam@89: strm->avail_in = 0; cannam@89: } cannam@89: state->how = COPY; cannam@89: state->direct = 1; cannam@89: return 0; cannam@89: } cannam@89: cannam@89: /* Decompress from input to the provided next_out and avail_out in the state. cannam@89: On return, state->x.have and state->x.next point to the just decompressed cannam@89: data. If the gzip stream completes, state->how is reset to LOOK to look for cannam@89: the next gzip stream or raw data, once state->x.have is depleted. Returns 0 cannam@89: on success, -1 on failure. */ cannam@89: local int gz_decomp(state) cannam@89: gz_statep state; cannam@89: { cannam@89: int ret = Z_OK; cannam@89: unsigned had; cannam@89: z_streamp strm = &(state->strm); cannam@89: cannam@89: /* fill output buffer up to end of deflate stream */ cannam@89: had = strm->avail_out; cannam@89: do { cannam@89: /* get more input for inflate() */ cannam@89: if (strm->avail_in == 0 && gz_avail(state) == -1) cannam@89: return -1; cannam@89: if (strm->avail_in == 0) { cannam@89: gz_error(state, Z_BUF_ERROR, "unexpected end of file"); cannam@89: break; cannam@89: } cannam@89: cannam@89: /* decompress and handle errors */ cannam@89: ret = inflate(strm, Z_NO_FLUSH); cannam@89: if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { cannam@89: gz_error(state, Z_STREAM_ERROR, cannam@89: "internal error: inflate stream corrupt"); cannam@89: return -1; cannam@89: } cannam@89: if (ret == Z_MEM_ERROR) { cannam@89: gz_error(state, Z_MEM_ERROR, "out of memory"); cannam@89: return -1; cannam@89: } cannam@89: if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ cannam@89: gz_error(state, Z_DATA_ERROR, cannam@89: strm->msg == NULL ? "compressed data error" : strm->msg); cannam@89: return -1; cannam@89: } cannam@89: } while (strm->avail_out && ret != Z_STREAM_END); cannam@89: cannam@89: /* update available output */ cannam@89: state->x.have = had - strm->avail_out; cannam@89: state->x.next = strm->next_out - state->x.have; cannam@89: cannam@89: /* if the gzip stream completed successfully, look for another */ cannam@89: if (ret == Z_STREAM_END) cannam@89: state->how = LOOK; cannam@89: cannam@89: /* good decompression */ cannam@89: return 0; cannam@89: } cannam@89: cannam@89: /* Fetch data and put it in the output buffer. Assumes state->x.have is 0. cannam@89: Data is either copied from the input file or decompressed from the input cannam@89: file depending on state->how. If state->how is LOOK, then a gzip header is cannam@89: looked for to determine whether to copy or decompress. Returns -1 on error, cannam@89: otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the cannam@89: end of the input file has been reached and all data has been processed. */ cannam@89: local int gz_fetch(state) cannam@89: gz_statep state; cannam@89: { cannam@89: z_streamp strm = &(state->strm); cannam@89: cannam@89: do { cannam@89: switch(state->how) { cannam@89: case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */ cannam@89: if (gz_look(state) == -1) cannam@89: return -1; cannam@89: if (state->how == LOOK) cannam@89: return 0; cannam@89: break; cannam@89: case COPY: /* -> COPY */ cannam@89: if (gz_load(state, state->out, state->size << 1, &(state->x.have)) cannam@89: == -1) cannam@89: return -1; cannam@89: state->x.next = state->out; cannam@89: return 0; cannam@89: case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */ cannam@89: strm->avail_out = state->size << 1; cannam@89: strm->next_out = state->out; cannam@89: if (gz_decomp(state) == -1) cannam@89: return -1; cannam@89: } cannam@89: } while (state->x.have == 0 && (!state->eof || strm->avail_in)); cannam@89: return 0; cannam@89: } cannam@89: cannam@89: /* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */ cannam@89: local int gz_skip(state, len) cannam@89: gz_statep state; cannam@89: z_off64_t len; cannam@89: { cannam@89: unsigned n; cannam@89: cannam@89: /* skip over len bytes or reach end-of-file, whichever comes first */ cannam@89: while (len) cannam@89: /* skip over whatever is in output buffer */ cannam@89: if (state->x.have) { cannam@89: n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ? cannam@89: (unsigned)len : state->x.have; cannam@89: state->x.have -= n; cannam@89: state->x.next += n; cannam@89: state->x.pos += n; cannam@89: len -= n; cannam@89: } cannam@89: cannam@89: /* output buffer empty -- return if we're at the end of the input */ cannam@89: else if (state->eof && state->strm.avail_in == 0) cannam@89: break; cannam@89: cannam@89: /* need more data to skip -- load up output buffer */ cannam@89: else { cannam@89: /* get more output, looking for header if required */ cannam@89: if (gz_fetch(state) == -1) cannam@89: return -1; cannam@89: } cannam@89: return 0; cannam@89: } cannam@89: cannam@89: /* -- see zlib.h -- */ cannam@89: int ZEXPORT gzread(file, buf, len) cannam@89: gzFile file; cannam@89: voidp buf; cannam@89: unsigned len; cannam@89: { cannam@89: unsigned got, n; cannam@89: gz_statep state; cannam@89: z_streamp strm; cannam@89: cannam@89: /* get internal structure */ cannam@89: if (file == NULL) cannam@89: return -1; cannam@89: state = (gz_statep)file; cannam@89: strm = &(state->strm); cannam@89: cannam@89: /* check that we're reading and that there's no (serious) error */ cannam@89: if (state->mode != GZ_READ || cannam@89: (state->err != Z_OK && state->err != Z_BUF_ERROR)) cannam@89: return -1; cannam@89: cannam@89: /* since an int is returned, make sure len fits in one, otherwise return cannam@89: with an error (this avoids the flaw in the interface) */ cannam@89: if ((int)len < 0) { cannam@89: gz_error(state, Z_DATA_ERROR, "requested length does not fit in int"); cannam@89: return -1; cannam@89: } cannam@89: cannam@89: /* if len is zero, avoid unnecessary operations */ cannam@89: if (len == 0) cannam@89: return 0; cannam@89: cannam@89: /* process a skip request */ cannam@89: if (state->seek) { cannam@89: state->seek = 0; cannam@89: if (gz_skip(state, state->skip) == -1) cannam@89: return -1; cannam@89: } cannam@89: cannam@89: /* get len bytes to buf, or less than len if at the end */ cannam@89: got = 0; cannam@89: do { cannam@89: /* first just try copying data from the output buffer */ cannam@89: if (state->x.have) { cannam@89: n = state->x.have > len ? len : state->x.have; cannam@89: memcpy(buf, state->x.next, n); cannam@89: state->x.next += n; cannam@89: state->x.have -= n; cannam@89: } cannam@89: cannam@89: /* output buffer empty -- return if we're at the end of the input */ cannam@89: else if (state->eof && strm->avail_in == 0) { cannam@89: state->past = 1; /* tried to read past end */ cannam@89: break; cannam@89: } cannam@89: cannam@89: /* need output data -- for small len or new stream load up our output cannam@89: buffer */ cannam@89: else if (state->how == LOOK || len < (state->size << 1)) { cannam@89: /* get more output, looking for header if required */ cannam@89: if (gz_fetch(state) == -1) cannam@89: return -1; cannam@89: continue; /* no progress yet -- go back to copy above */ cannam@89: /* the copy above assures that we will leave with space in the cannam@89: output buffer, allowing at least one gzungetc() to succeed */ cannam@89: } cannam@89: cannam@89: /* large len -- read directly into user buffer */ cannam@89: else if (state->how == COPY) { /* read directly */ cannam@89: if (gz_load(state, buf, len, &n) == -1) cannam@89: return -1; cannam@89: } cannam@89: cannam@89: /* large len -- decompress directly into user buffer */ cannam@89: else { /* state->how == GZIP */ cannam@89: strm->avail_out = len; cannam@89: strm->next_out = buf; cannam@89: if (gz_decomp(state) == -1) cannam@89: return -1; cannam@89: n = state->x.have; cannam@89: state->x.have = 0; cannam@89: } cannam@89: cannam@89: /* update progress */ cannam@89: len -= n; cannam@89: buf = (char *)buf + n; cannam@89: got += n; cannam@89: state->x.pos += n; cannam@89: } while (len); cannam@89: cannam@89: /* return number of bytes read into user buffer (will fit in int) */ cannam@89: return (int)got; cannam@89: } cannam@89: cannam@89: /* -- see zlib.h -- */ cannam@89: #undef gzgetc cannam@89: int ZEXPORT gzgetc(file) cannam@89: gzFile file; cannam@89: { cannam@89: int ret; cannam@89: unsigned char buf[1]; cannam@89: gz_statep state; cannam@89: cannam@89: /* get internal structure */ cannam@89: if (file == NULL) cannam@89: return -1; cannam@89: state = (gz_statep)file; cannam@89: cannam@89: /* check that we're reading and that there's no (serious) error */ cannam@89: if (state->mode != GZ_READ || cannam@89: (state->err != Z_OK && state->err != Z_BUF_ERROR)) cannam@89: return -1; cannam@89: cannam@89: /* try output buffer (no need to check for skip request) */ cannam@89: if (state->x.have) { cannam@89: state->x.have--; cannam@89: state->x.pos++; cannam@89: return *(state->x.next)++; cannam@89: } cannam@89: cannam@89: /* nothing there -- try gzread() */ cannam@89: ret = gzread(file, buf, 1); cannam@89: return ret < 1 ? -1 : buf[0]; cannam@89: } cannam@89: cannam@89: int ZEXPORT gzgetc_(file) cannam@89: gzFile file; cannam@89: { cannam@89: return gzgetc(file); cannam@89: } cannam@89: cannam@89: /* -- see zlib.h -- */ cannam@89: int ZEXPORT gzungetc(c, file) cannam@89: int c; cannam@89: gzFile file; cannam@89: { cannam@89: gz_statep state; cannam@89: cannam@89: /* get internal structure */ cannam@89: if (file == NULL) cannam@89: return -1; cannam@89: state = (gz_statep)file; cannam@89: cannam@89: /* check that we're reading and that there's no (serious) error */ cannam@89: if (state->mode != GZ_READ || cannam@89: (state->err != Z_OK && state->err != Z_BUF_ERROR)) cannam@89: return -1; cannam@89: cannam@89: /* process a skip request */ cannam@89: if (state->seek) { cannam@89: state->seek = 0; cannam@89: if (gz_skip(state, state->skip) == -1) cannam@89: return -1; cannam@89: } cannam@89: cannam@89: /* can't push EOF */ cannam@89: if (c < 0) cannam@89: return -1; cannam@89: cannam@89: /* if output buffer empty, put byte at end (allows more pushing) */ cannam@89: if (state->x.have == 0) { cannam@89: state->x.have = 1; cannam@89: state->x.next = state->out + (state->size << 1) - 1; cannam@89: state->x.next[0] = c; cannam@89: state->x.pos--; cannam@89: state->past = 0; cannam@89: return c; cannam@89: } cannam@89: cannam@89: /* if no room, give up (must have already done a gzungetc()) */ cannam@89: if (state->x.have == (state->size << 1)) { cannam@89: gz_error(state, Z_DATA_ERROR, "out of room to push characters"); cannam@89: return -1; cannam@89: } cannam@89: cannam@89: /* slide output data if needed and insert byte before existing data */ cannam@89: if (state->x.next == state->out) { cannam@89: unsigned char *src = state->out + state->x.have; cannam@89: unsigned char *dest = state->out + (state->size << 1); cannam@89: while (src > state->out) cannam@89: *--dest = *--src; cannam@89: state->x.next = dest; cannam@89: } cannam@89: state->x.have++; cannam@89: state->x.next--; cannam@89: state->x.next[0] = c; cannam@89: state->x.pos--; cannam@89: state->past = 0; cannam@89: return c; cannam@89: } cannam@89: cannam@89: /* -- see zlib.h -- */ cannam@89: char * ZEXPORT gzgets(file, buf, len) cannam@89: gzFile file; cannam@89: char *buf; cannam@89: int len; cannam@89: { cannam@89: unsigned left, n; cannam@89: char *str; cannam@89: unsigned char *eol; cannam@89: gz_statep state; cannam@89: cannam@89: /* check parameters and get internal structure */ cannam@89: if (file == NULL || buf == NULL || len < 1) cannam@89: return NULL; cannam@89: state = (gz_statep)file; cannam@89: cannam@89: /* check that we're reading and that there's no (serious) error */ cannam@89: if (state->mode != GZ_READ || cannam@89: (state->err != Z_OK && state->err != Z_BUF_ERROR)) cannam@89: return NULL; cannam@89: cannam@89: /* process a skip request */ cannam@89: if (state->seek) { cannam@89: state->seek = 0; cannam@89: if (gz_skip(state, state->skip) == -1) cannam@89: return NULL; cannam@89: } cannam@89: cannam@89: /* copy output bytes up to new line or len - 1, whichever comes first -- cannam@89: append a terminating zero to the string (we don't check for a zero in cannam@89: the contents, let the user worry about that) */ cannam@89: str = buf; cannam@89: left = (unsigned)len - 1; cannam@89: if (left) do { cannam@89: /* assure that something is in the output buffer */ cannam@89: if (state->x.have == 0 && gz_fetch(state) == -1) cannam@89: return NULL; /* error */ cannam@89: if (state->x.have == 0) { /* end of file */ cannam@89: state->past = 1; /* read past end */ cannam@89: break; /* return what we have */ cannam@89: } cannam@89: cannam@89: /* look for end-of-line in current output buffer */ cannam@89: n = state->x.have > left ? left : state->x.have; cannam@89: eol = memchr(state->x.next, '\n', n); cannam@89: if (eol != NULL) cannam@89: n = (unsigned)(eol - state->x.next) + 1; cannam@89: cannam@89: /* copy through end-of-line, or remainder if not found */ cannam@89: memcpy(buf, state->x.next, n); cannam@89: state->x.have -= n; cannam@89: state->x.next += n; cannam@89: state->x.pos += n; cannam@89: left -= n; cannam@89: buf += n; cannam@89: } while (left && eol == NULL); cannam@89: cannam@89: /* return terminated string, or if nothing, end of file */ cannam@89: if (buf == str) cannam@89: return NULL; cannam@89: buf[0] = 0; cannam@89: return str; cannam@89: } cannam@89: cannam@89: /* -- see zlib.h -- */ cannam@89: int ZEXPORT gzdirect(file) cannam@89: gzFile file; cannam@89: { cannam@89: gz_statep state; cannam@89: cannam@89: /* get internal structure */ cannam@89: if (file == NULL) cannam@89: return 0; cannam@89: state = (gz_statep)file; cannam@89: cannam@89: /* if the state is not known, but we can find out, then do so (this is cannam@89: mainly for right after a gzopen() or gzdopen()) */ cannam@89: if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0) cannam@89: (void)gz_look(state); cannam@89: cannam@89: /* return 1 if transparent, 0 if processing a gzip stream */ cannam@89: return state->direct; cannam@89: } cannam@89: cannam@89: /* -- see zlib.h -- */ cannam@89: int ZEXPORT gzclose_r(file) cannam@89: gzFile file; cannam@89: { cannam@89: int ret, err; cannam@89: gz_statep state; cannam@89: cannam@89: /* get internal structure */ cannam@89: if (file == NULL) cannam@89: return Z_STREAM_ERROR; cannam@89: state = (gz_statep)file; cannam@89: cannam@89: /* check that we're reading */ cannam@89: if (state->mode != GZ_READ) cannam@89: return Z_STREAM_ERROR; cannam@89: cannam@89: /* free memory and close file */ cannam@89: if (state->size) { cannam@89: inflateEnd(&(state->strm)); cannam@89: free(state->out); cannam@89: free(state->in); cannam@89: } cannam@89: err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK; cannam@89: gz_error(state, Z_OK, NULL); cannam@89: free(state->path); cannam@89: ret = close(state->fd); cannam@89: free(state); cannam@89: return ret ? Z_ERRNO : err; cannam@89: }