cannam@89: /*-----------------------------------------------------------*/ cannam@89: /*--- Block recoverer program for bzip2 ---*/ cannam@89: /*--- bzip2recover.c ---*/ cannam@89: /*-----------------------------------------------------------*/ cannam@89: cannam@89: /* ------------------------------------------------------------------ cannam@89: This file is part of bzip2/libbzip2, a program and library for cannam@89: lossless, block-sorting data compression. cannam@89: cannam@89: bzip2/libbzip2 version 1.0.6 of 6 September 2010 cannam@89: Copyright (C) 1996-2010 Julian Seward cannam@89: cannam@89: Please read the WARNING, DISCLAIMER and PATENTS sections in the cannam@89: README file. cannam@89: cannam@89: This program is released under the terms of the license contained cannam@89: in the file LICENSE. cannam@89: ------------------------------------------------------------------ */ cannam@89: cannam@89: /* This program is a complete hack and should be rewritten properly. cannam@89: It isn't very complicated. */ cannam@89: cannam@89: #include cannam@89: #include cannam@89: #include cannam@89: #include cannam@89: cannam@89: cannam@89: /* This program records bit locations in the file to be recovered. cannam@89: That means that if 64-bit ints are not supported, we will not cannam@89: be able to recover .bz2 files over 512MB (2^32 bits) long. cannam@89: On GNU supported platforms, we take advantage of the 64-bit cannam@89: int support to circumvent this problem. Ditto MSVC. cannam@89: cannam@89: This change occurred in version 1.0.2; all prior versions have cannam@89: the 512MB limitation. cannam@89: */ cannam@89: #ifdef __GNUC__ cannam@89: typedef unsigned long long int MaybeUInt64; cannam@89: # define MaybeUInt64_FMT "%Lu" cannam@89: #else cannam@89: #ifdef _MSC_VER cannam@89: typedef unsigned __int64 MaybeUInt64; cannam@89: # define MaybeUInt64_FMT "%I64u" cannam@89: #else cannam@89: typedef unsigned int MaybeUInt64; cannam@89: # define MaybeUInt64_FMT "%u" cannam@89: #endif cannam@89: #endif cannam@89: cannam@89: typedef unsigned int UInt32; cannam@89: typedef int Int32; cannam@89: typedef unsigned char UChar; cannam@89: typedef char Char; cannam@89: typedef unsigned char Bool; cannam@89: #define True ((Bool)1) cannam@89: #define False ((Bool)0) cannam@89: cannam@89: cannam@89: #define BZ_MAX_FILENAME 2000 cannam@89: cannam@89: Char inFileName[BZ_MAX_FILENAME]; cannam@89: Char outFileName[BZ_MAX_FILENAME]; cannam@89: Char progName[BZ_MAX_FILENAME]; cannam@89: cannam@89: MaybeUInt64 bytesOut = 0; cannam@89: MaybeUInt64 bytesIn = 0; cannam@89: cannam@89: cannam@89: /*---------------------------------------------------*/ cannam@89: /*--- Header bytes ---*/ cannam@89: /*---------------------------------------------------*/ cannam@89: cannam@89: #define BZ_HDR_B 0x42 /* 'B' */ cannam@89: #define BZ_HDR_Z 0x5a /* 'Z' */ cannam@89: #define BZ_HDR_h 0x68 /* 'h' */ cannam@89: #define BZ_HDR_0 0x30 /* '0' */ cannam@89: cannam@89: cannam@89: /*---------------------------------------------------*/ cannam@89: /*--- I/O errors ---*/ cannam@89: /*---------------------------------------------------*/ cannam@89: cannam@89: /*---------------------------------------------*/ cannam@89: static void readError ( void ) cannam@89: { cannam@89: fprintf ( stderr, cannam@89: "%s: I/O error reading `%s', possible reason follows.\n", cannam@89: progName, inFileName ); cannam@89: perror ( progName ); cannam@89: fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", cannam@89: progName ); cannam@89: exit ( 1 ); cannam@89: } cannam@89: cannam@89: cannam@89: /*---------------------------------------------*/ cannam@89: static void writeError ( void ) cannam@89: { cannam@89: fprintf ( stderr, cannam@89: "%s: I/O error reading `%s', possible reason follows.\n", cannam@89: progName, inFileName ); cannam@89: perror ( progName ); cannam@89: fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", cannam@89: progName ); cannam@89: exit ( 1 ); cannam@89: } cannam@89: cannam@89: cannam@89: /*---------------------------------------------*/ cannam@89: static void mallocFail ( Int32 n ) cannam@89: { cannam@89: fprintf ( stderr, cannam@89: "%s: malloc failed on request for %d bytes.\n", cannam@89: progName, n ); cannam@89: fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", cannam@89: progName ); cannam@89: exit ( 1 ); cannam@89: } cannam@89: cannam@89: cannam@89: /*---------------------------------------------*/ cannam@89: static void tooManyBlocks ( Int32 max_handled_blocks ) cannam@89: { cannam@89: fprintf ( stderr, cannam@89: "%s: `%s' appears to contain more than %d blocks\n", cannam@89: progName, inFileName, max_handled_blocks ); cannam@89: fprintf ( stderr, cannam@89: "%s: and cannot be handled. To fix, increase\n", cannam@89: progName ); cannam@89: fprintf ( stderr, cannam@89: "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n", cannam@89: progName ); cannam@89: exit ( 1 ); cannam@89: } cannam@89: cannam@89: cannam@89: cannam@89: /*---------------------------------------------------*/ cannam@89: /*--- Bit stream I/O ---*/ cannam@89: /*---------------------------------------------------*/ cannam@89: cannam@89: typedef cannam@89: struct { cannam@89: FILE* handle; cannam@89: Int32 buffer; cannam@89: Int32 buffLive; cannam@89: Char mode; cannam@89: } cannam@89: BitStream; cannam@89: cannam@89: cannam@89: /*---------------------------------------------*/ cannam@89: static BitStream* bsOpenReadStream ( FILE* stream ) cannam@89: { cannam@89: BitStream *bs = malloc ( sizeof(BitStream) ); cannam@89: if (bs == NULL) mallocFail ( sizeof(BitStream) ); cannam@89: bs->handle = stream; cannam@89: bs->buffer = 0; cannam@89: bs->buffLive = 0; cannam@89: bs->mode = 'r'; cannam@89: return bs; cannam@89: } cannam@89: cannam@89: cannam@89: /*---------------------------------------------*/ cannam@89: static BitStream* bsOpenWriteStream ( FILE* stream ) cannam@89: { cannam@89: BitStream *bs = malloc ( sizeof(BitStream) ); cannam@89: if (bs == NULL) mallocFail ( sizeof(BitStream) ); cannam@89: bs->handle = stream; cannam@89: bs->buffer = 0; cannam@89: bs->buffLive = 0; cannam@89: bs->mode = 'w'; cannam@89: return bs; cannam@89: } cannam@89: cannam@89: cannam@89: /*---------------------------------------------*/ cannam@89: static void bsPutBit ( BitStream* bs, Int32 bit ) cannam@89: { cannam@89: if (bs->buffLive == 8) { cannam@89: Int32 retVal = putc ( (UChar) bs->buffer, bs->handle ); cannam@89: if (retVal == EOF) writeError(); cannam@89: bytesOut++; cannam@89: bs->buffLive = 1; cannam@89: bs->buffer = bit & 0x1; cannam@89: } else { cannam@89: bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) ); cannam@89: bs->buffLive++; cannam@89: }; cannam@89: } cannam@89: cannam@89: cannam@89: /*---------------------------------------------*/ cannam@89: /*-- cannam@89: Returns 0 or 1, or 2 to indicate EOF. cannam@89: --*/ cannam@89: static Int32 bsGetBit ( BitStream* bs ) cannam@89: { cannam@89: if (bs->buffLive > 0) { cannam@89: bs->buffLive --; cannam@89: return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 ); cannam@89: } else { cannam@89: Int32 retVal = getc ( bs->handle ); cannam@89: if ( retVal == EOF ) { cannam@89: if (errno != 0) readError(); cannam@89: return 2; cannam@89: } cannam@89: bs->buffLive = 7; cannam@89: bs->buffer = retVal; cannam@89: return ( ((bs->buffer) >> 7) & 0x1 ); cannam@89: } cannam@89: } cannam@89: cannam@89: cannam@89: /*---------------------------------------------*/ cannam@89: static void bsClose ( BitStream* bs ) cannam@89: { cannam@89: Int32 retVal; cannam@89: cannam@89: if ( bs->mode == 'w' ) { cannam@89: while ( bs->buffLive < 8 ) { cannam@89: bs->buffLive++; cannam@89: bs->buffer <<= 1; cannam@89: }; cannam@89: retVal = putc ( (UChar) (bs->buffer), bs->handle ); cannam@89: if (retVal == EOF) writeError(); cannam@89: bytesOut++; cannam@89: retVal = fflush ( bs->handle ); cannam@89: if (retVal == EOF) writeError(); cannam@89: } cannam@89: retVal = fclose ( bs->handle ); cannam@89: if (retVal == EOF) { cannam@89: if (bs->mode == 'w') writeError(); else readError(); cannam@89: } cannam@89: free ( bs ); cannam@89: } cannam@89: cannam@89: cannam@89: /*---------------------------------------------*/ cannam@89: static void bsPutUChar ( BitStream* bs, UChar c ) cannam@89: { cannam@89: Int32 i; cannam@89: for (i = 7; i >= 0; i--) cannam@89: bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 ); cannam@89: } cannam@89: cannam@89: cannam@89: /*---------------------------------------------*/ cannam@89: static void bsPutUInt32 ( BitStream* bs, UInt32 c ) cannam@89: { cannam@89: Int32 i; cannam@89: cannam@89: for (i = 31; i >= 0; i--) cannam@89: bsPutBit ( bs, (c >> i) & 0x1 ); cannam@89: } cannam@89: cannam@89: cannam@89: /*---------------------------------------------*/ cannam@89: static Bool endsInBz2 ( Char* name ) cannam@89: { cannam@89: Int32 n = strlen ( name ); cannam@89: if (n <= 4) return False; cannam@89: return cannam@89: (name[n-4] == '.' && cannam@89: name[n-3] == 'b' && cannam@89: name[n-2] == 'z' && cannam@89: name[n-1] == '2'); cannam@89: } cannam@89: cannam@89: cannam@89: /*---------------------------------------------------*/ cannam@89: /*--- ---*/ cannam@89: /*---------------------------------------------------*/ cannam@89: cannam@89: /* This logic isn't really right when it comes to Cygwin. */ cannam@89: #ifdef _WIN32 cannam@89: # define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */ cannam@89: #else cannam@89: # define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */ cannam@89: #endif cannam@89: cannam@89: #define BLOCK_HEADER_HI 0x00003141UL cannam@89: #define BLOCK_HEADER_LO 0x59265359UL cannam@89: cannam@89: #define BLOCK_ENDMARK_HI 0x00001772UL cannam@89: #define BLOCK_ENDMARK_LO 0x45385090UL cannam@89: cannam@89: /* Increase if necessary. However, a .bz2 file with > 50000 blocks cannam@89: would have an uncompressed size of at least 40GB, so the chances cannam@89: are low you'll need to up this. cannam@89: */ cannam@89: #define BZ_MAX_HANDLED_BLOCKS 50000 cannam@89: cannam@89: MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS]; cannam@89: MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS]; cannam@89: MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS]; cannam@89: MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS]; cannam@89: cannam@89: Int32 main ( Int32 argc, Char** argv ) cannam@89: { cannam@89: FILE* inFile; cannam@89: FILE* outFile; cannam@89: BitStream* bsIn, *bsWr; cannam@89: Int32 b, wrBlock, currBlock, rbCtr; cannam@89: MaybeUInt64 bitsRead; cannam@89: cannam@89: UInt32 buffHi, buffLo, blockCRC; cannam@89: Char* p; cannam@89: cannam@89: strcpy ( progName, argv[0] ); cannam@89: inFileName[0] = outFileName[0] = 0; cannam@89: cannam@89: fprintf ( stderr, cannam@89: "bzip2recover 1.0.6: extracts blocks from damaged .bz2 files.\n" ); cannam@89: cannam@89: if (argc != 2) { cannam@89: fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n", cannam@89: progName, progName ); cannam@89: switch (sizeof(MaybeUInt64)) { cannam@89: case 8: cannam@89: fprintf(stderr, cannam@89: "\trestrictions on size of recovered file: None\n"); cannam@89: break; cannam@89: case 4: cannam@89: fprintf(stderr, cannam@89: "\trestrictions on size of recovered file: 512 MB\n"); cannam@89: fprintf(stderr, cannam@89: "\tto circumvent, recompile with MaybeUInt64 as an\n" cannam@89: "\tunsigned 64-bit int.\n"); cannam@89: break; cannam@89: default: cannam@89: fprintf(stderr, cannam@89: "\tsizeof(MaybeUInt64) is not 4 or 8 -- " cannam@89: "configuration error.\n"); cannam@89: break; cannam@89: } cannam@89: exit(1); cannam@89: } cannam@89: cannam@89: if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) { cannam@89: fprintf ( stderr, cannam@89: "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n", cannam@89: progName, (int)strlen(argv[1]) ); cannam@89: exit(1); cannam@89: } cannam@89: cannam@89: strcpy ( inFileName, argv[1] ); cannam@89: cannam@89: inFile = fopen ( inFileName, "rb" ); cannam@89: if (inFile == NULL) { cannam@89: fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName ); cannam@89: exit(1); cannam@89: } cannam@89: cannam@89: bsIn = bsOpenReadStream ( inFile ); cannam@89: fprintf ( stderr, "%s: searching for block boundaries ...\n", progName ); cannam@89: cannam@89: bitsRead = 0; cannam@89: buffHi = buffLo = 0; cannam@89: currBlock = 0; cannam@89: bStart[currBlock] = 0; cannam@89: cannam@89: rbCtr = 0; cannam@89: cannam@89: while (True) { cannam@89: b = bsGetBit ( bsIn ); cannam@89: bitsRead++; cannam@89: if (b == 2) { cannam@89: if (bitsRead >= bStart[currBlock] && cannam@89: (bitsRead - bStart[currBlock]) >= 40) { cannam@89: bEnd[currBlock] = bitsRead-1; cannam@89: if (currBlock > 0) cannam@89: fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT cannam@89: " to " MaybeUInt64_FMT " (incomplete)\n", cannam@89: currBlock, bStart[currBlock], bEnd[currBlock] ); cannam@89: } else cannam@89: currBlock--; cannam@89: break; cannam@89: } cannam@89: buffHi = (buffHi << 1) | (buffLo >> 31); cannam@89: buffLo = (buffLo << 1) | (b & 1); cannam@89: if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI cannam@89: && buffLo == BLOCK_HEADER_LO) cannam@89: || cannam@89: ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI cannam@89: && buffLo == BLOCK_ENDMARK_LO) cannam@89: ) { cannam@89: if (bitsRead > 49) { cannam@89: bEnd[currBlock] = bitsRead-49; cannam@89: } else { cannam@89: bEnd[currBlock] = 0; cannam@89: } cannam@89: if (currBlock > 0 && cannam@89: (bEnd[currBlock] - bStart[currBlock]) >= 130) { cannam@89: fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT cannam@89: " to " MaybeUInt64_FMT "\n", cannam@89: rbCtr+1, bStart[currBlock], bEnd[currBlock] ); cannam@89: rbStart[rbCtr] = bStart[currBlock]; cannam@89: rbEnd[rbCtr] = bEnd[currBlock]; cannam@89: rbCtr++; cannam@89: } cannam@89: if (currBlock >= BZ_MAX_HANDLED_BLOCKS) cannam@89: tooManyBlocks(BZ_MAX_HANDLED_BLOCKS); cannam@89: currBlock++; cannam@89: cannam@89: bStart[currBlock] = bitsRead; cannam@89: } cannam@89: } cannam@89: cannam@89: bsClose ( bsIn ); cannam@89: cannam@89: /*-- identified blocks run from 1 to rbCtr inclusive. --*/ cannam@89: cannam@89: if (rbCtr < 1) { cannam@89: fprintf ( stderr, cannam@89: "%s: sorry, I couldn't find any block boundaries.\n", cannam@89: progName ); cannam@89: exit(1); cannam@89: }; cannam@89: cannam@89: fprintf ( stderr, "%s: splitting into blocks\n", progName ); cannam@89: cannam@89: inFile = fopen ( inFileName, "rb" ); cannam@89: if (inFile == NULL) { cannam@89: fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName ); cannam@89: exit(1); cannam@89: } cannam@89: bsIn = bsOpenReadStream ( inFile ); cannam@89: cannam@89: /*-- placate gcc's dataflow analyser --*/ cannam@89: blockCRC = 0; bsWr = 0; cannam@89: cannam@89: bitsRead = 0; cannam@89: outFile = NULL; cannam@89: wrBlock = 0; cannam@89: while (True) { cannam@89: b = bsGetBit(bsIn); cannam@89: if (b == 2) break; cannam@89: buffHi = (buffHi << 1) | (buffLo >> 31); cannam@89: buffLo = (buffLo << 1) | (b & 1); cannam@89: if (bitsRead == 47+rbStart[wrBlock]) cannam@89: blockCRC = (buffHi << 16) | (buffLo >> 16); cannam@89: cannam@89: if (outFile != NULL && bitsRead >= rbStart[wrBlock] cannam@89: && bitsRead <= rbEnd[wrBlock]) { cannam@89: bsPutBit ( bsWr, b ); cannam@89: } cannam@89: cannam@89: bitsRead++; cannam@89: cannam@89: if (bitsRead == rbEnd[wrBlock]+1) { cannam@89: if (outFile != NULL) { cannam@89: bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 ); cannam@89: bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 ); cannam@89: bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 ); cannam@89: bsPutUInt32 ( bsWr, blockCRC ); cannam@89: bsClose ( bsWr ); cannam@89: } cannam@89: if (wrBlock >= rbCtr) break; cannam@89: wrBlock++; cannam@89: } else cannam@89: if (bitsRead == rbStart[wrBlock]) { cannam@89: /* Create the output file name, correctly handling leading paths. cannam@89: (31.10.2001 by Sergey E. Kusikov) */ cannam@89: Char* split; cannam@89: Int32 ofs, k; cannam@89: for (k = 0; k < BZ_MAX_FILENAME; k++) cannam@89: outFileName[k] = 0; cannam@89: strcpy (outFileName, inFileName); cannam@89: split = strrchr (outFileName, BZ_SPLIT_SYM); cannam@89: if (split == NULL) { cannam@89: split = outFileName; cannam@89: } else { cannam@89: ++split; cannam@89: } cannam@89: /* Now split points to the start of the basename. */ cannam@89: ofs = split - outFileName; cannam@89: sprintf (split, "rec%5d", wrBlock+1); cannam@89: for (p = split; *p != 0; p++) if (*p == ' ') *p = '0'; cannam@89: strcat (outFileName, inFileName + ofs); cannam@89: cannam@89: if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" ); cannam@89: cannam@89: fprintf ( stderr, " writing block %d to `%s' ...\n", cannam@89: wrBlock+1, outFileName ); cannam@89: cannam@89: outFile = fopen ( outFileName, "wb" ); cannam@89: if (outFile == NULL) { cannam@89: fprintf ( stderr, "%s: can't write `%s'\n", cannam@89: progName, outFileName ); cannam@89: exit(1); cannam@89: } cannam@89: bsWr = bsOpenWriteStream ( outFile ); cannam@89: bsPutUChar ( bsWr, BZ_HDR_B ); cannam@89: bsPutUChar ( bsWr, BZ_HDR_Z ); cannam@89: bsPutUChar ( bsWr, BZ_HDR_h ); cannam@89: bsPutUChar ( bsWr, BZ_HDR_0 + 9 ); cannam@89: bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 ); cannam@89: bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 ); cannam@89: bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 ); cannam@89: } cannam@89: } cannam@89: cannam@89: fprintf ( stderr, "%s: finished\n", progName ); cannam@89: return 0; cannam@89: } cannam@89: cannam@89: cannam@89: cannam@89: /*-----------------------------------------------------------*/ cannam@89: /*--- end bzip2recover.c ---*/ cannam@89: /*-----------------------------------------------------------*/