comparison kdiff3/src/gnudiff_io.cpp @ 69:8febbfb1148c

KDiff3 0.9.89
author joachim99
date Mon, 10 Apr 2006 08:40:51 +0000
parents d7cafcda8c99
children
comparison
equal deleted inserted replaced
68:d7cafcda8c99 69:8febbfb1148c
17 GNU General Public License for more details. 17 GNU General Public License for more details.
18 18
19 You should have received a copy of the GNU General Public License 19 You should have received a copy of the GNU General Public License
20 along with this program; see the file COPYING. 20 along with this program; see the file COPYING.
21 If not, write to the Free Software Foundation, 21 If not, write to the Free Software Foundation,
22 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 22 51 Franklin Steet, Fifth Floor, Boston, MA 02110-1301, USA. */
23 23
24 #include "gnudiff_diff.h" 24 #include "gnudiff_diff.h"
25 #include <stdlib.h> 25 #include <stdlib.h>
26 26
27 /* Rotate an unsigned value to the left. */ 27 /* Rotate an unsigned value to the left. */
70 /* Return 1 if BUF contains a non text character. 70 /* Return 1 if BUF contains a non text character.
71 SIZE is the number of characters in BUF. */ 71 SIZE is the number of characters in BUF. */
72 72
73 #define binary_file_p(buf, size) (memchr (buf, 0, size) != 0) 73 #define binary_file_p(buf, size) (memchr (buf, 0, size) != 0)
74 74
75 /* Compare two lines (typically one from each input file)
76 according to the command line options.
77 For efficiency, this is invoked only when the lines do not match exactly
78 but an option like -i might cause us to ignore the difference.
79 Return nonzero if the lines differ. */
80
81 bool GnuDiff::lines_differ (const QChar *s1, size_t len1, const QChar *s2, size_t len2 )
82 {
83 const QChar *t1 = s1;
84 const QChar *t2 = s2;
85 const QChar *s1end = s1+len1;
86 const QChar *s2end = s2+len2;
87
88 for ( ; ; ++t1, ++t2 )
89 {
90 /* Test for exact char equality first, since it's a common case. */
91 if ( t1!=s1end && t2!=s2end && *t1==*t2 )
92 continue;
93 else
94 {
95 while ( t1!=s1end &&
96 ( bIgnoreWhiteSpace && isWhite( *t1 ) ||
97 bIgnoreNumbers && (t1->isDigit() || *t1=='-' || *t1=='.' )))
98 {
99 ++t1;
100 }
101
102 while ( t2 != s2end &&
103 ( bIgnoreWhiteSpace && isWhite( *t2 ) ||
104 bIgnoreNumbers && (t2->isDigit() || *t2=='-' || *t2=='.' )))
105 {
106 ++t2;
107 }
108
109 if ( t1!=s1end && t2!=s2end )
110 {
111 if (ignore_case)
112 { /* Lowercase comparison. */
113 if ( t1->lower() == t2->lower() )
114 continue;
115 }
116 else if ( *t1 == *t2 )
117 continue;
118 else
119 return true;
120 }
121 else if ( t1==s1end && t2==s2end )
122 return false;
123 else
124 return true;
125 }
126 }
127 return false;
128 }
129
75 130
76 /* Split the file into lines, simultaneously computing the equivalence 131 /* Split the file into lines, simultaneously computing the equivalence
77 class for each line. */ 132 class for each line. */
78 133
79 void GnuDiff::find_and_hash_each_line (struct file_data *current) 134 void GnuDiff::find_and_hash_each_line (struct file_data *current)
80 { 135 {
81 hash_value h; 136 hash_value h;
82 const QChar *p = (const QChar *) current->prefix_end; 137 const QChar *p = current->prefix_end;
83 QChar c; 138 QChar c;
84 lin i, *bucket; 139 lin i, *bucket;
85 size_t length; 140 size_t length;
86 141
87 /* Cache often-used quantities in local variables to help the compiler. */ 142 /* Cache often-used quantities in local variables to help the compiler. */
92 lin *cureqs = (lin*)xmalloc (alloc_lines * sizeof *cureqs); 147 lin *cureqs = (lin*)xmalloc (alloc_lines * sizeof *cureqs);
93 struct equivclass *eqs = equivs; 148 struct equivclass *eqs = equivs;
94 lin eqs_index = equivs_index; 149 lin eqs_index = equivs_index;
95 lin eqs_alloc = equivs_alloc; 150 lin eqs_alloc = equivs_alloc;
96 const QChar *suffix_begin = current->suffix_begin; 151 const QChar *suffix_begin = current->suffix_begin;
97 const QChar *bufend = FILE_BUFFER (current) + current->buffered; 152 const QChar *bufend = current->buffer + current->buffered;
98 bool diff_length_compare_anyway = 153 bool diff_length_compare_anyway =
99 ignore_white_space != IGNORE_NO_WHITE_SPACE || bIgnoreNumbers; 154 ignore_white_space != IGNORE_NO_WHITE_SPACE || bIgnoreNumbers;
100 bool same_length_diff_contents_compare_anyway = 155 bool same_length_diff_contents_compare_anyway =
101 diff_length_compare_anyway | ignore_case; 156 diff_length_compare_anyway | ignore_case;
102 157
103 while ((const QChar *) p < suffix_begin) 158 while ( p < suffix_begin)
104 { 159 {
105 const QChar *ip = (const QChar *) p; 160 const QChar *ip = p;
106 161
107 h = 0; 162 h = 0;
108 163
109 /* Hash this line until we find a newline. */ 164 /* Hash this line until we find a newline or bufend is reached. */
110 if (ignore_case) 165 if (ignore_case)
111 switch (ignore_white_space) 166 switch (ignore_white_space)
112 { 167 {
113 case IGNORE_ALL_SPACE: 168 case IGNORE_ALL_SPACE:
114 while ((c = *p++) != '\n') 169 while ( p<bufend && (c = *p) != '\n' )
170 {
115 if (! (isWhite(c) || bIgnoreNumbers && (c.isDigit() || c=='-' || c=='.' ) )) 171 if (! (isWhite(c) || bIgnoreNumbers && (c.isDigit() || c=='-' || c=='.' ) ))
116 h = HASH (h, c.lower().unicode()); 172 h = HASH (h, c.lower().unicode());
173 ++p;
174 }
117 break; 175 break;
118 176
119 default: 177 default:
120 while ((c = *p++) != '\n') 178 while ( p<bufend && (c = *p) != '\n' )
179 {
121 h = HASH (h, c.lower().unicode()); 180 h = HASH (h, c.lower().unicode());
181 ++p;
182 }
122 break; 183 break;
123 } 184 }
124 else 185 else
125 switch (ignore_white_space) 186 switch (ignore_white_space)
126 { 187 {
127 case IGNORE_ALL_SPACE: 188 case IGNORE_ALL_SPACE:
128 while ((c = *p++) != '\n') 189 while ( p<bufend && (c = *p) != '\n')
190 {
129 if (! (isWhite(c)|| bIgnoreNumbers && (c.isDigit() || c=='-' || c=='.' ) )) 191 if (! (isWhite(c)|| bIgnoreNumbers && (c.isDigit() || c=='-' || c=='.' ) ))
130 h = HASH (h, c.unicode()); 192 h = HASH (h, c.unicode());
193 ++p;
194 }
131 break; 195 break;
132 196
133 default: 197 default:
134 while ((c = *p++) != '\n') 198 while ( p<bufend && (c = *p) != '\n')
199 {
135 h = HASH (h, c.unicode()); 200 h = HASH (h, c.unicode());
201 ++p;
202 }
136 break; 203 break;
137 } 204 }
138 205
139 bucket = &buckets[h % nbuckets]; 206 bucket = &buckets[h % nbuckets];
140 length = (const QChar *) p - ip - 1; 207 length = p - ip;
141 208 ++p;
142 if ((const QChar *) p >= bufend
143 && current->missing_newline
144 && ROBUST_OUTPUT_STYLE (output_style))
145 {
146 /* This line is incomplete. If this is significant,
147 put the line into buckets[-1]. */
148 if (ignore_white_space < IGNORE_SPACE_CHANGE)
149 bucket = &buckets[-1];
150
151 /* Omit the inserted newline when computing linbuf later. */
152 p--;
153 bufend = suffix_begin = (const QChar *) p;
154 }
155 209
156 for (i = *bucket; ; i = eqs[i].next) 210 for (i = *bucket; ; i = eqs[i].next)
157 if (!i) 211 if (!i)
158 { 212 {
159 /* Create a new equivalence class in this bucket. */ 213 /* Create a new equivalence class in this bucket. */
181 if (eqs[i].length == length) 235 if (eqs[i].length == length)
182 { 236 {
183 /* Reuse existing equivalence class if the lines are identical. 237 /* Reuse existing equivalence class if the lines are identical.
184 This detects the common case of exact identity 238 This detects the common case of exact identity
185 faster than lines_differ would. */ 239 faster than lines_differ would. */
186 if (memcmp (eqline, ip, length) == 0) 240 if (memcmp (eqline, ip, length*sizeof(QChar)) == 0)
187 break; 241 break;
188 if (!same_length_diff_contents_compare_anyway) 242 if (!same_length_diff_contents_compare_anyway)
189 continue; 243 continue;
190 } 244 }
191 else if (!diff_length_compare_anyway) 245 else if (!diff_length_compare_anyway)
192 continue; 246 continue;
193 247
194 if (! lines_differ (eqline, ip)) 248 if (! lines_differ (eqline, eqs[i].length, ip, length))
195 break; 249 break;
196 } 250 }
197 251
198 /* Maybe increase the size of the line table. */ 252 /* Maybe increase the size of the line table. */
199 if (line == alloc_lines) 253 if (line == alloc_lines)
233 linbuf += linbuf_base; 287 linbuf += linbuf_base;
234 linbuf = (const QChar**)xrealloc (linbuf, 288 linbuf = (const QChar**)xrealloc (linbuf,
235 (alloc_lines - linbuf_base) * sizeof *linbuf); 289 (alloc_lines - linbuf_base) * sizeof *linbuf);
236 linbuf -= linbuf_base; 290 linbuf -= linbuf_base;
237 } 291 }
238 linbuf[line] = (const QChar *) p; 292 linbuf[line] = p;
239 293
240 if ((const QChar *) p >= bufend) 294 if ( p >= bufend)
241 break; 295 break;
242 296
243 if (context <= i && no_diff_means_no_output) 297 if (context <= i && no_diff_means_no_output)
244 break; 298 break;
245 299
246 line++; 300 line++;
247 301
248 while (*p++ != '\n') 302 while (p<bufend && *p++ != '\n')
249 continue; 303 continue;
250 } 304 }
251 305
252 /* Done with cache in local variables. */ 306 /* Done with cache in local variables. */
253 current->linbuf = linbuf; 307 current->linbuf = linbuf;
254 current->valid_lines = line; 308 current->valid_lines = line;
257 equivs = eqs; 311 equivs = eqs;
258 equivs_alloc = eqs_alloc; 312 equivs_alloc = eqs_alloc;
259 equivs_index = eqs_index; 313 equivs_index = eqs_index;
260 } 314 }
261 315
262 /* Prepare the text. Make sure the text end is initialized.
263 Make sure text ends in a newline,
264 but remember that we had to add one.
265 Strip trailing CRs, if that was requested. */
266
267 void GnuDiff::prepare_text (struct file_data *current)
268 {
269 size_t buffered = current->buffered;
270 QChar *p = FILE_BUFFER (current);
271
272 if (buffered == 0 || p[buffered - 1] == '\n')
273 current->missing_newline = 0;
274 else
275 {
276 p[buffered++] = '\n';
277 current->missing_newline = 1;
278 }
279
280 if (!p)
281 return;
282
283 /* Don't use uninitialized storage when planting or using sentinels. */
284 memset (p + buffered, 0, sizeof (word));
285
286 current->buffered = buffered;
287 }
288
289 /* We have found N lines in a buffer of size S; guess the 316 /* We have found N lines in a buffer of size S; guess the
290 proportionate number of lines that will be found in a buffer of 317 proportionate number of lines that will be found in a buffer of
291 size T. However, do not guess a number of lines so large that the 318 size T. However, do not guess a number of lines so large that the
292 resulting line table might cause overflow in size calculations. */ 319 resulting line table might cause overflow in size calculations. */
293 static lin 320 static lin
301 /* Given a vector of two file_data objects, find the identical 328 /* Given a vector of two file_data objects, find the identical
302 prefixes and suffixes of each object. */ 329 prefixes and suffixes of each object. */
303 330
304 void GnuDiff::find_identical_ends (struct file_data filevec[]) 331 void GnuDiff::find_identical_ends (struct file_data filevec[])
305 { 332 {
306 word *w0, *w1; 333 /* Find identical prefix. */
307 QChar *p0, *p1, *buffer0, *buffer1; 334 const QChar *p0, *p1, *buffer0, *buffer1;
308 const QChar *end0, *beg0; 335 p0 = buffer0 = filevec[0].buffer;
309 const QChar **linbuf0, **linbuf1; 336 p1 = buffer1 = filevec[1].buffer;
310 lin i, lines;
311 size_t n0, n1; 337 size_t n0, n1;
312 lin alloc_lines0, alloc_lines1;
313 lin buffered_prefix, prefix_count, prefix_mask;
314 lin middle_guess, suffix_guess;
315
316 prepare_text (&filevec[0]);
317 prepare_text (&filevec[1]);
318
319 /* Find identical prefix. */
320
321 w0 = filevec[0].buffer;
322 w1 = filevec[1].buffer;
323 p0 = buffer0 = (QChar *) w0;
324 p1 = buffer1 = (QChar *) w1;
325 n0 = filevec[0].buffered; 338 n0 = filevec[0].buffered;
326 n1 = filevec[1].buffered; 339 n1 = filevec[1].buffered;
340 const QChar* const pEnd0 = p0 + n0;
341 const QChar* const pEnd1 = p1 + n1;
327 342
328 if (p0 == p1) 343 if (p0 == p1)
329 /* The buffers are the same; sentinels won't work. */ 344 /* The buffers are the same; sentinels won't work. */
330 p0 = p1 += n1; 345 p0 = p1 += n1;
331 else 346 else
332 { 347 {
333 /* Insert end sentinels, in this case characters that are guaranteed 348 /* Loop until first mismatch, or end. */
334 to make the equality test false, and thus terminate the loop. */ 349 while ( p0!=pEnd0 && p1!=pEnd1 && *p0 == *p1 )
335 350 {
336 if (n0 < n1) 351 p0++;
337 p0[n0] = ~p1[n0]; 352 p1++;
338 else 353 }
339 p1[n1] = ~p0[n1];
340
341 /* Loop until first mismatch, or to the sentinel characters. */
342
343 /* Compare a word at a time for speed. */
344 while (*w0 == *w1)
345 w0++, w1++;
346
347 /* Do the last few bytes of comparison a byte at a time. */
348 p0 = (QChar *) w0;
349 p1 = (QChar *) w1;
350 while (*p0 == *p1)
351 p0++, p1++;
352
353 /* Don't mistakenly count missing newline as part of prefix. */
354 if (ROBUST_OUTPUT_STYLE (output_style)
355 && ((buffer0 + n0 - filevec[0].missing_newline < p0)
356 !=
357 (buffer1 + n1 - filevec[1].missing_newline < p1)))
358 p0--, p1--;
359 } 354 }
360 355
361 /* Now P0 and P1 point at the first nonmatching characters. */ 356 /* Now P0 and P1 point at the first nonmatching characters. */
362 357
363 /* Skip back to last line-beginning in the prefix, 358 /* Skip back to last line-beginning in the prefix. */
364 and then discard up to HORIZON_LINES lines from the prefix. */ 359 while (p0 != buffer0 && (p0[-1] != '\n' ))
365 i = horizon_lines;
366 while (p0 != buffer0 && (p0[-1] != '\n' || i--))
367 p0--, p1--; 360 p0--, p1--;
368 361
369 /* Record the prefix. */ 362 /* Record the prefix. */
370 filevec[0].prefix_end = p0; 363 filevec[0].prefix_end = p0;
371 filevec[1].prefix_end = p1; 364 filevec[1].prefix_end = p1;
374 367
375 /* P0 and P1 point beyond the last chars not yet compared. */ 368 /* P0 and P1 point beyond the last chars not yet compared. */
376 p0 = buffer0 + n0; 369 p0 = buffer0 + n0;
377 p1 = buffer1 + n1; 370 p1 = buffer1 + n1;
378 371
379 if (! ROBUST_OUTPUT_STYLE (output_style) 372 const QChar *end0, *beg0;
380 || filevec[0].missing_newline == filevec[1].missing_newline) 373 end0 = p0; /* Addr of last char in file 0. */
381 { 374
382 end0 = p0; /* Addr of last char in file 0. */ 375 /* Get value of P0 at which we should stop scanning backward:
383 376 this is when either P0 or P1 points just past the last char
384 /* Get value of P0 at which we should stop scanning backward: 377 of the identical prefix. */
385 this is when either P0 or P1 points just past the last char 378 beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);
386 of the identical prefix. */ 379
387 beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1); 380 /* Scan back until chars don't match or we reach that point. */
388 381 for (; p0 != beg0; p0--, p1--)
389 /* Scan back until chars don't match or we reach that point. */ 382 {
390 for (; p0 != beg0; p0--, p1--) 383 if (*p0 != *p1)
391 if (*p0 != *p1) 384 {
392 { 385 /* Point at the first char of the matching suffix. */
393 /* Point at the first char of the matching suffix. */ 386 beg0 = p0;
394 beg0 = p0; 387 break;
395 break; 388 }
396 } 389 }
397 390
398 /* Are we at a line-beginning in both files? If not, add the rest of 391 // Go to the next line (skip last line with a difference)
399 this line to the main body. Discard up to HORIZON_LINES lines from 392 if ( p0 != end0 )
400 the identical suffix. Also, discard one extra line, 393 {
401 because shift_boundaries may need it. */ 394 if (*p0 != *p1)
402 i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n') 395 ++p0;
403 && 396 while ( p0<pEnd0 && *p0++ != '\n')
404 (buffer1 == p1 || p1[-1] == '\n')); 397 continue;
405 while (i-- && p0 != end0) 398 }
406 while (*p0++ != '\n') 399
407 continue; 400 p1 += p0 - beg0;
408
409 p1 += p0 - beg0;
410 }
411 401
412 /* Record the suffix. */ 402 /* Record the suffix. */
413 filevec[0].suffix_begin = p0; 403 filevec[0].suffix_begin = p0;
414 filevec[1].suffix_begin = p1; 404 filevec[1].suffix_begin = p1;
415 405
425 Otherwise, prefix_count != 0. Save just prefix_count lines at start 415 Otherwise, prefix_count != 0. Save just prefix_count lines at start
426 of the line buffer; they'll be moved to the proper location later. 416 of the line buffer; they'll be moved to the proper location later.
427 Handle 1 more line than the context says (because we count 1 too many), 417 Handle 1 more line than the context says (because we count 1 too many),
428 rounded up to the next power of 2 to speed index computation. */ 418 rounded up to the next power of 2 to speed index computation. */
429 419
420 const QChar **linbuf0, **linbuf1;
421 lin alloc_lines0, alloc_lines1;
422 lin buffered_prefix, prefix_count, prefix_mask;
423 lin middle_guess, suffix_guess;
430 if (no_diff_means_no_output 424 if (no_diff_means_no_output
431 && context < (lin)(LIN_MAX / 4) && context < (lin)(n0)) 425 && context < (lin)(LIN_MAX / 4) && context < (lin)(n0))
432 { 426 {
433 middle_guess = guess_lines (0, 0, p0 - filevec[0].prefix_end); 427 middle_guess = guess_lines (0, 0, p0 - filevec[0].prefix_end);
434 suffix_guess = guess_lines (0, 0, buffer0 + n0 - p0); 428 suffix_guess = guess_lines (0, 0, buffer0 + n0 - p0);
442 prefix_count = 0; 436 prefix_count = 0;
443 alloc_lines0 = guess_lines (0, 0, n0); 437 alloc_lines0 = guess_lines (0, 0, n0);
444 } 438 }
445 439
446 prefix_mask = prefix_count - 1; 440 prefix_mask = prefix_count - 1;
447 lines = 0; 441 lin lines = 0;
448 linbuf0 = (const QChar**) xmalloc (alloc_lines0 * sizeof(*linbuf0)); 442 linbuf0 = (const QChar**) xmalloc (alloc_lines0 * sizeof(*linbuf0));
449 p0 = buffer0; 443 p0 = buffer0;
450 444
451 /* If the prefix is needed, find the prefix lines. */ 445 /* If the prefix is needed, find the prefix lines. */
452 if (! (no_diff_means_no_output 446 if (! (no_diff_means_no_output
463 xalloc_die (); 457 xalloc_die ();
464 alloc_lines0 *= 2; 458 alloc_lines0 *= 2;
465 linbuf0 = (const QChar**) xrealloc (linbuf0, alloc_lines0 * sizeof(*linbuf0)); 459 linbuf0 = (const QChar**) xrealloc (linbuf0, alloc_lines0 * sizeof(*linbuf0));
466 } 460 }
467 linbuf0[l] = p0; 461 linbuf0[l] = p0;
468 while (*p0++ != '\n') 462 while ( p0<pEnd0 && *p0++ != '\n' )
469 continue; 463 continue;
470 } 464 }
471 } 465 }
472 buffered_prefix = prefix_count && context < lines ? context : lines; 466 buffered_prefix = prefix_count && context < lines ? context : lines;
473 467
479 if (alloc_lines1 < buffered_prefix 473 if (alloc_lines1 < buffered_prefix
480 || (lin)(PTRDIFF_MAX / sizeof *linbuf1) <= alloc_lines1) 474 || (lin)(PTRDIFF_MAX / sizeof *linbuf1) <= alloc_lines1)
481 xalloc_die (); 475 xalloc_die ();
482 linbuf1 = (const QChar**)xmalloc (alloc_lines1 * sizeof(*linbuf1)); 476 linbuf1 = (const QChar**)xmalloc (alloc_lines1 * sizeof(*linbuf1));
483 477
478 lin i;
484 if (buffered_prefix != lines) 479 if (buffered_prefix != lines)
485 { 480 {
486 /* Rotate prefix lines to proper location. */ 481 /* Rotate prefix lines to proper location. */
487 for (i = 0; i < buffered_prefix; i++) 482 for (i = 0; i < buffered_prefix; i++)
488 linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask]; 483 linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
489 for (i = 0; i < buffered_prefix; i++) 484 for (i = 0; i < buffered_prefix; i++)
490 linbuf0[i] = linbuf1[i]; 485 linbuf0[i] = linbuf1[i];
491 } 486 }
492 487
493 /* Initialize line buffer 1 from line buffer 0. */ 488 /* Initialize line buffer 1 from line buffer 0. */
494 for (i = 0; i < buffered_prefix; i++) 489 for (i = 0; i < buffered_prefix; i++)
495 linbuf1[i] = linbuf0[i] - buffer0 + buffer1; 490 linbuf1[i] = linbuf0[i] - buffer0 + buffer1;
496 491