Mercurial > hg > easyhg-kdiff3
diff kdiff3/src/gnudiff_io.cpp @ 69:8febbfb1148c
KDiff3 0.9.89
author | joachim99 |
---|---|
date | Mon, 10 Apr 2006 08:40:51 +0000 |
parents | d7cafcda8c99 |
children |
line wrap: on
line diff
--- a/kdiff3/src/gnudiff_io.cpp Mon Jan 31 22:30:47 2005 +0000 +++ b/kdiff3/src/gnudiff_io.cpp Mon Apr 10 08:40:51 2006 +0000 @@ -19,7 +19,7 @@ You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the Free Software Foundation, - 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + 51 Franklin Steet, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "gnudiff_diff.h" #include <stdlib.h> @@ -72,6 +72,61 @@ #define binary_file_p(buf, size) (memchr (buf, 0, size) != 0) +/* Compare two lines (typically one from each input file) + according to the command line options. + For efficiency, this is invoked only when the lines do not match exactly + but an option like -i might cause us to ignore the difference. + Return nonzero if the lines differ. */ + +bool GnuDiff::lines_differ (const QChar *s1, size_t len1, const QChar *s2, size_t len2 ) +{ + const QChar *t1 = s1; + const QChar *t2 = s2; + const QChar *s1end = s1+len1; + const QChar *s2end = s2+len2; + + for ( ; ; ++t1, ++t2 ) + { + /* Test for exact char equality first, since it's a common case. */ + if ( t1!=s1end && t2!=s2end && *t1==*t2 ) + continue; + else + { + while ( t1!=s1end && + ( bIgnoreWhiteSpace && isWhite( *t1 ) || + bIgnoreNumbers && (t1->isDigit() || *t1=='-' || *t1=='.' ))) + { + ++t1; + } + + while ( t2 != s2end && + ( bIgnoreWhiteSpace && isWhite( *t2 ) || + bIgnoreNumbers && (t2->isDigit() || *t2=='-' || *t2=='.' ))) + { + ++t2; + } + + if ( t1!=s1end && t2!=s2end ) + { + if (ignore_case) + { /* Lowercase comparison. */ + if ( t1->lower() == t2->lower() ) + continue; + } + else if ( *t1 == *t2 ) + continue; + else + return true; + } + else if ( t1==s1end && t2==s2end ) + return false; + else + return true; + } + } + return false; +} + /* Split the file into lines, simultaneously computing the equivalence class for each line. */ @@ -79,7 +134,7 @@ void GnuDiff::find_and_hash_each_line (struct file_data *current) { hash_value h; - const QChar *p = (const QChar *) current->prefix_end; + const QChar *p = current->prefix_end; QChar c; lin i, *bucket; size_t length; @@ -94,64 +149,63 @@ lin eqs_index = equivs_index; lin eqs_alloc = equivs_alloc; const QChar *suffix_begin = current->suffix_begin; - const QChar *bufend = FILE_BUFFER (current) + current->buffered; + const QChar *bufend = current->buffer + current->buffered; bool diff_length_compare_anyway = ignore_white_space != IGNORE_NO_WHITE_SPACE || bIgnoreNumbers; bool same_length_diff_contents_compare_anyway = diff_length_compare_anyway | ignore_case; - while ((const QChar *) p < suffix_begin) + while ( p < suffix_begin) { - const QChar *ip = (const QChar *) p; + const QChar *ip = p; h = 0; - /* Hash this line until we find a newline. */ + /* Hash this line until we find a newline or bufend is reached. */ if (ignore_case) switch (ignore_white_space) { case IGNORE_ALL_SPACE: - while ((c = *p++) != '\n') + while ( p<bufend && (c = *p) != '\n' ) + { if (! (isWhite(c) || bIgnoreNumbers && (c.isDigit() || c=='-' || c=='.' ) )) h = HASH (h, c.lower().unicode()); + ++p; + } break; default: - while ((c = *p++) != '\n') + while ( p<bufend && (c = *p) != '\n' ) + { h = HASH (h, c.lower().unicode()); + ++p; + } break; } else switch (ignore_white_space) { case IGNORE_ALL_SPACE: - while ((c = *p++) != '\n') + while ( p<bufend && (c = *p) != '\n') + { if (! (isWhite(c)|| bIgnoreNumbers && (c.isDigit() || c=='-' || c=='.' ) )) - h = HASH (h, c.unicode()); + h = HASH (h, c.unicode()); + ++p; + } break; default: - while ((c = *p++) != '\n') + while ( p<bufend && (c = *p) != '\n') + { h = HASH (h, c.unicode()); + ++p; + } break; } bucket = &buckets[h % nbuckets]; - length = (const QChar *) p - ip - 1; - - if ((const QChar *) p >= bufend - && current->missing_newline - && ROBUST_OUTPUT_STYLE (output_style)) - { - /* This line is incomplete. If this is significant, - put the line into buckets[-1]. */ - if (ignore_white_space < IGNORE_SPACE_CHANGE) - bucket = &buckets[-1]; - - /* Omit the inserted newline when computing linbuf later. */ - p--; - bufend = suffix_begin = (const QChar *) p; - } + length = p - ip; + ++p; for (i = *bucket; ; i = eqs[i].next) if (!i) @@ -183,7 +237,7 @@ /* Reuse existing equivalence class if the lines are identical. This detects the common case of exact identity faster than lines_differ would. */ - if (memcmp (eqline, ip, length) == 0) + if (memcmp (eqline, ip, length*sizeof(QChar)) == 0) break; if (!same_length_diff_contents_compare_anyway) continue; @@ -191,7 +245,7 @@ else if (!diff_length_compare_anyway) continue; - if (! lines_differ (eqline, ip)) + if (! lines_differ (eqline, eqs[i].length, ip, length)) break; } @@ -235,9 +289,9 @@ (alloc_lines - linbuf_base) * sizeof *linbuf); linbuf -= linbuf_base; } - linbuf[line] = (const QChar *) p; + linbuf[line] = p; - if ((const QChar *) p >= bufend) + if ( p >= bufend) break; if (context <= i && no_diff_means_no_output) @@ -245,8 +299,8 @@ line++; - while (*p++ != '\n') - continue; + while (p<bufend && *p++ != '\n') + continue; } /* Done with cache in local variables. */ @@ -259,33 +313,6 @@ equivs_index = eqs_index; } -/* Prepare the text. Make sure the text end is initialized. - Make sure text ends in a newline, - but remember that we had to add one. - Strip trailing CRs, if that was requested. */ - -void GnuDiff::prepare_text (struct file_data *current) -{ - size_t buffered = current->buffered; - QChar *p = FILE_BUFFER (current); - - if (buffered == 0 || p[buffered - 1] == '\n') - current->missing_newline = 0; - else - { - p[buffered++] = '\n'; - current->missing_newline = 1; - } - - if (!p) - return; - - /* Don't use uninitialized storage when planting or using sentinels. */ - memset (p + buffered, 0, sizeof (word)); - - current->buffered = buffered; -} - /* We have found N lines in a buffer of size S; guess the proportionate number of lines that will be found in a buffer of size T. However, do not guess a number of lines so large that the @@ -303,67 +330,33 @@ void GnuDiff::find_identical_ends (struct file_data filevec[]) { - word *w0, *w1; - QChar *p0, *p1, *buffer0, *buffer1; - const QChar *end0, *beg0; - const QChar **linbuf0, **linbuf1; - lin i, lines; + /* Find identical prefix. */ + const QChar *p0, *p1, *buffer0, *buffer1; + p0 = buffer0 = filevec[0].buffer; + p1 = buffer1 = filevec[1].buffer; size_t n0, n1; - lin alloc_lines0, alloc_lines1; - lin buffered_prefix, prefix_count, prefix_mask; - lin middle_guess, suffix_guess; - - prepare_text (&filevec[0]); - prepare_text (&filevec[1]); - - /* Find identical prefix. */ - - w0 = filevec[0].buffer; - w1 = filevec[1].buffer; - p0 = buffer0 = (QChar *) w0; - p1 = buffer1 = (QChar *) w1; n0 = filevec[0].buffered; n1 = filevec[1].buffered; + const QChar* const pEnd0 = p0 + n0; + const QChar* const pEnd1 = p1 + n1; if (p0 == p1) /* The buffers are the same; sentinels won't work. */ p0 = p1 += n1; else { - /* Insert end sentinels, in this case characters that are guaranteed - to make the equality test false, and thus terminate the loop. */ - - if (n0 < n1) - p0[n0] = ~p1[n0]; - else - p1[n1] = ~p0[n1]; - - /* Loop until first mismatch, or to the sentinel characters. */ - - /* Compare a word at a time for speed. */ - while (*w0 == *w1) - w0++, w1++; - - /* Do the last few bytes of comparison a byte at a time. */ - p0 = (QChar *) w0; - p1 = (QChar *) w1; - while (*p0 == *p1) - p0++, p1++; - - /* Don't mistakenly count missing newline as part of prefix. */ - if (ROBUST_OUTPUT_STYLE (output_style) - && ((buffer0 + n0 - filevec[0].missing_newline < p0) - != - (buffer1 + n1 - filevec[1].missing_newline < p1))) - p0--, p1--; + /* Loop until first mismatch, or end. */ + while ( p0!=pEnd0 && p1!=pEnd1 && *p0 == *p1 ) + { + p0++; + p1++; + } } /* Now P0 and P1 point at the first nonmatching characters. */ - /* Skip back to last line-beginning in the prefix, - and then discard up to HORIZON_LINES lines from the prefix. */ - i = horizon_lines; - while (p0 != buffer0 && (p0[-1] != '\n' || i--)) + /* Skip back to last line-beginning in the prefix. */ + while (p0 != buffer0 && (p0[-1] != '\n' )) p0--, p1--; /* Record the prefix. */ @@ -376,38 +369,35 @@ p0 = buffer0 + n0; p1 = buffer1 + n1; - if (! ROBUST_OUTPUT_STYLE (output_style) - || filevec[0].missing_newline == filevec[1].missing_newline) - { - end0 = p0; /* Addr of last char in file 0. */ + const QChar *end0, *beg0; + end0 = p0; /* Addr of last char in file 0. */ - /* Get value of P0 at which we should stop scanning backward: - this is when either P0 or P1 points just past the last char - of the identical prefix. */ - beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1); + /* Get value of P0 at which we should stop scanning backward: + this is when either P0 or P1 points just past the last char + of the identical prefix. */ + beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1); - /* Scan back until chars don't match or we reach that point. */ - for (; p0 != beg0; p0--, p1--) - if (*p0 != *p1) - { - /* Point at the first char of the matching suffix. */ - beg0 = p0; - break; - } + /* Scan back until chars don't match or we reach that point. */ + for (; p0 != beg0; p0--, p1--) + { + if (*p0 != *p1) + { + /* Point at the first char of the matching suffix. */ + beg0 = p0; + break; + } + } - /* Are we at a line-beginning in both files? If not, add the rest of - this line to the main body. Discard up to HORIZON_LINES lines from - the identical suffix. Also, discard one extra line, - because shift_boundaries may need it. */ - i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n') - && - (buffer1 == p1 || p1[-1] == '\n')); - while (i-- && p0 != end0) - while (*p0++ != '\n') - continue; + // Go to the next line (skip last line with a difference) + if ( p0 != end0 ) + { + if (*p0 != *p1) + ++p0; + while ( p0<pEnd0 && *p0++ != '\n') + continue; + } - p1 += p0 - beg0; - } + p1 += p0 - beg0; /* Record the suffix. */ filevec[0].suffix_begin = p0; @@ -427,6 +417,10 @@ Handle 1 more line than the context says (because we count 1 too many), rounded up to the next power of 2 to speed index computation. */ + const QChar **linbuf0, **linbuf1; + lin alloc_lines0, alloc_lines1; + lin buffered_prefix, prefix_count, prefix_mask; + lin middle_guess, suffix_guess; if (no_diff_means_no_output && context < (lin)(LIN_MAX / 4) && context < (lin)(n0)) { @@ -444,7 +438,7 @@ } prefix_mask = prefix_count - 1; - lines = 0; + lin lines = 0; linbuf0 = (const QChar**) xmalloc (alloc_lines0 * sizeof(*linbuf0)); p0 = buffer0; @@ -465,7 +459,7 @@ linbuf0 = (const QChar**) xrealloc (linbuf0, alloc_lines0 * sizeof(*linbuf0)); } linbuf0[l] = p0; - while (*p0++ != '\n') + while ( p0<pEnd0 && *p0++ != '\n' ) continue; } } @@ -481,14 +475,15 @@ xalloc_die (); linbuf1 = (const QChar**)xmalloc (alloc_lines1 * sizeof(*linbuf1)); + lin i; if (buffered_prefix != lines) - { + { /* Rotate prefix lines to proper location. */ for (i = 0; i < buffered_prefix; i++) linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask]; for (i = 0; i < buffered_prefix; i++) linbuf0[i] = linbuf1[i]; - } + } /* Initialize line buffer 1 from line buffer 0. */ for (i = 0; i < buffered_prefix; i++)