Chris@4: /*-----------------------------------------------------------*/ Chris@4: /*--- Block recoverer program for bzip2 ---*/ Chris@4: /*--- bzip2recover.c ---*/ Chris@4: /*-----------------------------------------------------------*/ Chris@4: Chris@4: /* ------------------------------------------------------------------ Chris@4: This file is part of bzip2/libbzip2, a program and library for Chris@4: lossless, block-sorting data compression. Chris@4: Chris@4: bzip2/libbzip2 version 1.0.6 of 6 September 2010 Chris@4: Copyright (C) 1996-2010 Julian Seward Chris@4: Chris@4: Please read the WARNING, DISCLAIMER and PATENTS sections in the Chris@4: README file. Chris@4: Chris@4: This program is released under the terms of the license contained Chris@4: in the file LICENSE. Chris@4: ------------------------------------------------------------------ */ Chris@4: Chris@4: /* This program is a complete hack and should be rewritten properly. Chris@4: It isn't very complicated. */ Chris@4: Chris@4: #include Chris@4: #include Chris@4: #include Chris@4: #include Chris@4: Chris@4: Chris@4: /* This program records bit locations in the file to be recovered. Chris@4: That means that if 64-bit ints are not supported, we will not Chris@4: be able to recover .bz2 files over 512MB (2^32 bits) long. Chris@4: On GNU supported platforms, we take advantage of the 64-bit Chris@4: int support to circumvent this problem. Ditto MSVC. Chris@4: Chris@4: This change occurred in version 1.0.2; all prior versions have Chris@4: the 512MB limitation. Chris@4: */ Chris@4: #ifdef __GNUC__ Chris@4: typedef unsigned long long int MaybeUInt64; Chris@4: # define MaybeUInt64_FMT "%Lu" Chris@4: #else Chris@4: #ifdef _MSC_VER Chris@4: typedef unsigned __int64 MaybeUInt64; Chris@4: # define MaybeUInt64_FMT "%I64u" Chris@4: #else Chris@4: typedef unsigned int MaybeUInt64; Chris@4: # define MaybeUInt64_FMT "%u" Chris@4: #endif Chris@4: #endif Chris@4: Chris@4: typedef unsigned int UInt32; Chris@4: typedef int Int32; Chris@4: typedef unsigned char UChar; Chris@4: typedef char Char; Chris@4: typedef unsigned char Bool; Chris@4: #define True ((Bool)1) Chris@4: #define False ((Bool)0) Chris@4: Chris@4: Chris@4: #define BZ_MAX_FILENAME 2000 Chris@4: Chris@4: Char inFileName[BZ_MAX_FILENAME]; Chris@4: Char outFileName[BZ_MAX_FILENAME]; Chris@4: Char progName[BZ_MAX_FILENAME]; Chris@4: Chris@4: MaybeUInt64 bytesOut = 0; Chris@4: MaybeUInt64 bytesIn = 0; Chris@4: Chris@4: Chris@4: /*---------------------------------------------------*/ Chris@4: /*--- Header bytes ---*/ Chris@4: /*---------------------------------------------------*/ Chris@4: Chris@4: #define BZ_HDR_B 0x42 /* 'B' */ Chris@4: #define BZ_HDR_Z 0x5a /* 'Z' */ Chris@4: #define BZ_HDR_h 0x68 /* 'h' */ Chris@4: #define BZ_HDR_0 0x30 /* '0' */ Chris@4: Chris@4: Chris@4: /*---------------------------------------------------*/ Chris@4: /*--- I/O errors ---*/ Chris@4: /*---------------------------------------------------*/ Chris@4: Chris@4: /*---------------------------------------------*/ Chris@4: static void readError ( void ) Chris@4: { Chris@4: fprintf ( stderr, Chris@4: "%s: I/O error reading `%s', possible reason follows.\n", Chris@4: progName, inFileName ); Chris@4: perror ( progName ); Chris@4: fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", Chris@4: progName ); Chris@4: exit ( 1 ); Chris@4: } Chris@4: Chris@4: Chris@4: /*---------------------------------------------*/ Chris@4: static void writeError ( void ) Chris@4: { Chris@4: fprintf ( stderr, Chris@4: "%s: I/O error reading `%s', possible reason follows.\n", Chris@4: progName, inFileName ); Chris@4: perror ( progName ); Chris@4: fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", Chris@4: progName ); Chris@4: exit ( 1 ); Chris@4: } Chris@4: Chris@4: Chris@4: /*---------------------------------------------*/ Chris@4: static void mallocFail ( Int32 n ) Chris@4: { Chris@4: fprintf ( stderr, Chris@4: "%s: malloc failed on request for %d bytes.\n", Chris@4: progName, n ); Chris@4: fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", Chris@4: progName ); Chris@4: exit ( 1 ); Chris@4: } Chris@4: Chris@4: Chris@4: /*---------------------------------------------*/ Chris@4: static void tooManyBlocks ( Int32 max_handled_blocks ) Chris@4: { Chris@4: fprintf ( stderr, Chris@4: "%s: `%s' appears to contain more than %d blocks\n", Chris@4: progName, inFileName, max_handled_blocks ); Chris@4: fprintf ( stderr, Chris@4: "%s: and cannot be handled. To fix, increase\n", Chris@4: progName ); Chris@4: fprintf ( stderr, Chris@4: "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n", Chris@4: progName ); Chris@4: exit ( 1 ); Chris@4: } Chris@4: Chris@4: Chris@4: Chris@4: /*---------------------------------------------------*/ Chris@4: /*--- Bit stream I/O ---*/ Chris@4: /*---------------------------------------------------*/ Chris@4: Chris@4: typedef Chris@4: struct { Chris@4: FILE* handle; Chris@4: Int32 buffer; Chris@4: Int32 buffLive; Chris@4: Char mode; Chris@4: } Chris@4: BitStream; Chris@4: Chris@4: Chris@4: /*---------------------------------------------*/ Chris@4: static BitStream* bsOpenReadStream ( FILE* stream ) Chris@4: { Chris@4: BitStream *bs = malloc ( sizeof(BitStream) ); Chris@4: if (bs == NULL) mallocFail ( sizeof(BitStream) ); Chris@4: bs->handle = stream; Chris@4: bs->buffer = 0; Chris@4: bs->buffLive = 0; Chris@4: bs->mode = 'r'; Chris@4: return bs; Chris@4: } Chris@4: Chris@4: Chris@4: /*---------------------------------------------*/ Chris@4: static BitStream* bsOpenWriteStream ( FILE* stream ) Chris@4: { Chris@4: BitStream *bs = malloc ( sizeof(BitStream) ); Chris@4: if (bs == NULL) mallocFail ( sizeof(BitStream) ); Chris@4: bs->handle = stream; Chris@4: bs->buffer = 0; Chris@4: bs->buffLive = 0; Chris@4: bs->mode = 'w'; Chris@4: return bs; Chris@4: } Chris@4: Chris@4: Chris@4: /*---------------------------------------------*/ Chris@4: static void bsPutBit ( BitStream* bs, Int32 bit ) Chris@4: { Chris@4: if (bs->buffLive == 8) { Chris@4: Int32 retVal = putc ( (UChar) bs->buffer, bs->handle ); Chris@4: if (retVal == EOF) writeError(); Chris@4: bytesOut++; Chris@4: bs->buffLive = 1; Chris@4: bs->buffer = bit & 0x1; Chris@4: } else { Chris@4: bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) ); Chris@4: bs->buffLive++; Chris@4: }; Chris@4: } Chris@4: Chris@4: Chris@4: /*---------------------------------------------*/ Chris@4: /*-- Chris@4: Returns 0 or 1, or 2 to indicate EOF. Chris@4: --*/ Chris@4: static Int32 bsGetBit ( BitStream* bs ) Chris@4: { Chris@4: if (bs->buffLive > 0) { Chris@4: bs->buffLive --; Chris@4: return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 ); Chris@4: } else { Chris@4: Int32 retVal = getc ( bs->handle ); Chris@4: if ( retVal == EOF ) { Chris@4: if (errno != 0) readError(); Chris@4: return 2; Chris@4: } Chris@4: bs->buffLive = 7; Chris@4: bs->buffer = retVal; Chris@4: return ( ((bs->buffer) >> 7) & 0x1 ); Chris@4: } Chris@4: } Chris@4: Chris@4: Chris@4: /*---------------------------------------------*/ Chris@4: static void bsClose ( BitStream* bs ) Chris@4: { Chris@4: Int32 retVal; Chris@4: Chris@4: if ( bs->mode == 'w' ) { Chris@4: while ( bs->buffLive < 8 ) { Chris@4: bs->buffLive++; Chris@4: bs->buffer <<= 1; Chris@4: }; Chris@4: retVal = putc ( (UChar) (bs->buffer), bs->handle ); Chris@4: if (retVal == EOF) writeError(); Chris@4: bytesOut++; Chris@4: retVal = fflush ( bs->handle ); Chris@4: if (retVal == EOF) writeError(); Chris@4: } Chris@4: retVal = fclose ( bs->handle ); Chris@4: if (retVal == EOF) { Chris@4: if (bs->mode == 'w') writeError(); else readError(); Chris@4: } Chris@4: free ( bs ); Chris@4: } Chris@4: Chris@4: Chris@4: /*---------------------------------------------*/ Chris@4: static void bsPutUChar ( BitStream* bs, UChar c ) Chris@4: { Chris@4: Int32 i; Chris@4: for (i = 7; i >= 0; i--) Chris@4: bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 ); Chris@4: } Chris@4: Chris@4: Chris@4: /*---------------------------------------------*/ Chris@4: static void bsPutUInt32 ( BitStream* bs, UInt32 c ) Chris@4: { Chris@4: Int32 i; Chris@4: Chris@4: for (i = 31; i >= 0; i--) Chris@4: bsPutBit ( bs, (c >> i) & 0x1 ); Chris@4: } Chris@4: Chris@4: Chris@4: /*---------------------------------------------*/ Chris@4: static Bool endsInBz2 ( Char* name ) Chris@4: { Chris@4: Int32 n = strlen ( name ); Chris@4: if (n <= 4) return False; Chris@4: return Chris@4: (name[n-4] == '.' && Chris@4: name[n-3] == 'b' && Chris@4: name[n-2] == 'z' && Chris@4: name[n-1] == '2'); Chris@4: } Chris@4: Chris@4: Chris@4: /*---------------------------------------------------*/ Chris@4: /*--- ---*/ Chris@4: /*---------------------------------------------------*/ Chris@4: Chris@4: /* This logic isn't really right when it comes to Cygwin. */ Chris@4: #ifdef _WIN32 Chris@4: # define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */ Chris@4: #else Chris@4: # define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */ Chris@4: #endif Chris@4: Chris@4: #define BLOCK_HEADER_HI 0x00003141UL Chris@4: #define BLOCK_HEADER_LO 0x59265359UL Chris@4: Chris@4: #define BLOCK_ENDMARK_HI 0x00001772UL Chris@4: #define BLOCK_ENDMARK_LO 0x45385090UL Chris@4: Chris@4: /* Increase if necessary. However, a .bz2 file with > 50000 blocks Chris@4: would have an uncompressed size of at least 40GB, so the chances Chris@4: are low you'll need to up this. Chris@4: */ Chris@4: #define BZ_MAX_HANDLED_BLOCKS 50000 Chris@4: Chris@4: MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS]; Chris@4: MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS]; Chris@4: MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS]; Chris@4: MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS]; Chris@4: Chris@4: Int32 main ( Int32 argc, Char** argv ) Chris@4: { Chris@4: FILE* inFile; Chris@4: FILE* outFile; Chris@4: BitStream* bsIn, *bsWr; Chris@4: Int32 b, wrBlock, currBlock, rbCtr; Chris@4: MaybeUInt64 bitsRead; Chris@4: Chris@4: UInt32 buffHi, buffLo, blockCRC; Chris@4: Char* p; Chris@4: Chris@4: strcpy ( progName, argv[0] ); Chris@4: inFileName[0] = outFileName[0] = 0; Chris@4: Chris@4: fprintf ( stderr, Chris@4: "bzip2recover 1.0.6: extracts blocks from damaged .bz2 files.\n" ); Chris@4: Chris@4: if (argc != 2) { Chris@4: fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n", Chris@4: progName, progName ); Chris@4: switch (sizeof(MaybeUInt64)) { Chris@4: case 8: Chris@4: fprintf(stderr, Chris@4: "\trestrictions on size of recovered file: None\n"); Chris@4: break; Chris@4: case 4: Chris@4: fprintf(stderr, Chris@4: "\trestrictions on size of recovered file: 512 MB\n"); Chris@4: fprintf(stderr, Chris@4: "\tto circumvent, recompile with MaybeUInt64 as an\n" Chris@4: "\tunsigned 64-bit int.\n"); Chris@4: break; Chris@4: default: Chris@4: fprintf(stderr, Chris@4: "\tsizeof(MaybeUInt64) is not 4 or 8 -- " Chris@4: "configuration error.\n"); Chris@4: break; Chris@4: } Chris@4: exit(1); Chris@4: } Chris@4: Chris@4: if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) { Chris@4: fprintf ( stderr, Chris@4: "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n", Chris@4: progName, (int)strlen(argv[1]) ); Chris@4: exit(1); Chris@4: } Chris@4: Chris@4: strcpy ( inFileName, argv[1] ); Chris@4: Chris@4: inFile = fopen ( inFileName, "rb" ); Chris@4: if (inFile == NULL) { Chris@4: fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName ); Chris@4: exit(1); Chris@4: } Chris@4: Chris@4: bsIn = bsOpenReadStream ( inFile ); Chris@4: fprintf ( stderr, "%s: searching for block boundaries ...\n", progName ); Chris@4: Chris@4: bitsRead = 0; Chris@4: buffHi = buffLo = 0; Chris@4: currBlock = 0; Chris@4: bStart[currBlock] = 0; Chris@4: Chris@4: rbCtr = 0; Chris@4: Chris@4: while (True) { Chris@4: b = bsGetBit ( bsIn ); Chris@4: bitsRead++; Chris@4: if (b == 2) { Chris@4: if (bitsRead >= bStart[currBlock] && Chris@4: (bitsRead - bStart[currBlock]) >= 40) { Chris@4: bEnd[currBlock] = bitsRead-1; Chris@4: if (currBlock > 0) Chris@4: fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT Chris@4: " to " MaybeUInt64_FMT " (incomplete)\n", Chris@4: currBlock, bStart[currBlock], bEnd[currBlock] ); Chris@4: } else Chris@4: currBlock--; Chris@4: break; Chris@4: } Chris@4: buffHi = (buffHi << 1) | (buffLo >> 31); Chris@4: buffLo = (buffLo << 1) | (b & 1); Chris@4: if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI Chris@4: && buffLo == BLOCK_HEADER_LO) Chris@4: || Chris@4: ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI Chris@4: && buffLo == BLOCK_ENDMARK_LO) Chris@4: ) { Chris@4: if (bitsRead > 49) { Chris@4: bEnd[currBlock] = bitsRead-49; Chris@4: } else { Chris@4: bEnd[currBlock] = 0; Chris@4: } Chris@4: if (currBlock > 0 && Chris@4: (bEnd[currBlock] - bStart[currBlock]) >= 130) { Chris@4: fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT Chris@4: " to " MaybeUInt64_FMT "\n", Chris@4: rbCtr+1, bStart[currBlock], bEnd[currBlock] ); Chris@4: rbStart[rbCtr] = bStart[currBlock]; Chris@4: rbEnd[rbCtr] = bEnd[currBlock]; Chris@4: rbCtr++; Chris@4: } Chris@4: if (currBlock >= BZ_MAX_HANDLED_BLOCKS) Chris@4: tooManyBlocks(BZ_MAX_HANDLED_BLOCKS); Chris@4: currBlock++; Chris@4: Chris@4: bStart[currBlock] = bitsRead; Chris@4: } Chris@4: } Chris@4: Chris@4: bsClose ( bsIn ); Chris@4: Chris@4: /*-- identified blocks run from 1 to rbCtr inclusive. --*/ Chris@4: Chris@4: if (rbCtr < 1) { Chris@4: fprintf ( stderr, Chris@4: "%s: sorry, I couldn't find any block boundaries.\n", Chris@4: progName ); Chris@4: exit(1); Chris@4: }; Chris@4: Chris@4: fprintf ( stderr, "%s: splitting into blocks\n", progName ); Chris@4: Chris@4: inFile = fopen ( inFileName, "rb" ); Chris@4: if (inFile == NULL) { Chris@4: fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName ); Chris@4: exit(1); Chris@4: } Chris@4: bsIn = bsOpenReadStream ( inFile ); Chris@4: Chris@4: /*-- placate gcc's dataflow analyser --*/ Chris@4: blockCRC = 0; bsWr = 0; Chris@4: Chris@4: bitsRead = 0; Chris@4: outFile = NULL; Chris@4: wrBlock = 0; Chris@4: while (True) { Chris@4: b = bsGetBit(bsIn); Chris@4: if (b == 2) break; Chris@4: buffHi = (buffHi << 1) | (buffLo >> 31); Chris@4: buffLo = (buffLo << 1) | (b & 1); Chris@4: if (bitsRead == 47+rbStart[wrBlock]) Chris@4: blockCRC = (buffHi << 16) | (buffLo >> 16); Chris@4: Chris@4: if (outFile != NULL && bitsRead >= rbStart[wrBlock] Chris@4: && bitsRead <= rbEnd[wrBlock]) { Chris@4: bsPutBit ( bsWr, b ); Chris@4: } Chris@4: Chris@4: bitsRead++; Chris@4: Chris@4: if (bitsRead == rbEnd[wrBlock]+1) { Chris@4: if (outFile != NULL) { Chris@4: bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 ); Chris@4: bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 ); Chris@4: bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 ); Chris@4: bsPutUInt32 ( bsWr, blockCRC ); Chris@4: bsClose ( bsWr ); Chris@4: } Chris@4: if (wrBlock >= rbCtr) break; Chris@4: wrBlock++; Chris@4: } else Chris@4: if (bitsRead == rbStart[wrBlock]) { Chris@4: /* Create the output file name, correctly handling leading paths. Chris@4: (31.10.2001 by Sergey E. Kusikov) */ Chris@4: Char* split; Chris@4: Int32 ofs, k; Chris@4: for (k = 0; k < BZ_MAX_FILENAME; k++) Chris@4: outFileName[k] = 0; Chris@4: strcpy (outFileName, inFileName); Chris@4: split = strrchr (outFileName, BZ_SPLIT_SYM); Chris@4: if (split == NULL) { Chris@4: split = outFileName; Chris@4: } else { Chris@4: ++split; Chris@4: } Chris@4: /* Now split points to the start of the basename. */ Chris@4: ofs = split - outFileName; Chris@4: sprintf (split, "rec%5d", wrBlock+1); Chris@4: for (p = split; *p != 0; p++) if (*p == ' ') *p = '0'; Chris@4: strcat (outFileName, inFileName + ofs); Chris@4: Chris@4: if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" ); Chris@4: Chris@4: fprintf ( stderr, " writing block %d to `%s' ...\n", Chris@4: wrBlock+1, outFileName ); Chris@4: Chris@4: outFile = fopen ( outFileName, "wb" ); Chris@4: if (outFile == NULL) { Chris@4: fprintf ( stderr, "%s: can't write `%s'\n", Chris@4: progName, outFileName ); Chris@4: exit(1); Chris@4: } Chris@4: bsWr = bsOpenWriteStream ( outFile ); Chris@4: bsPutUChar ( bsWr, BZ_HDR_B ); Chris@4: bsPutUChar ( bsWr, BZ_HDR_Z ); Chris@4: bsPutUChar ( bsWr, BZ_HDR_h ); Chris@4: bsPutUChar ( bsWr, BZ_HDR_0 + 9 ); Chris@4: bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 ); Chris@4: bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 ); Chris@4: bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 ); Chris@4: } Chris@4: } Chris@4: Chris@4: fprintf ( stderr, "%s: finished\n", progName ); Chris@4: return 0; Chris@4: } Chris@4: Chris@4: Chris@4: Chris@4: /*-----------------------------------------------------------*/ Chris@4: /*--- end bzip2recover.c ---*/ Chris@4: /*-----------------------------------------------------------*/