comparison kdiff3/src/gnudiff_io.cpp @ 68:d7cafcda8c99

KDiff3 0.9.87
author joachim99
date Mon, 31 Jan 2005 22:30:47 +0000
parents efe33e938730
children 8febbfb1148c
comparison
equal deleted inserted replaced
67:ec82d69e8b0c 68:d7cafcda8c99
1 /* File I/O for GNU DIFF. 1 /* File I/O for GNU DIFF.
2 2
3 Modified for KDiff3 by Joachim Eibl 2003. 3 Modified for KDiff3 by Joachim Eibl 2003, 2004, 2005.
4 The original file was part of GNU DIFF. 4 The original file was part of GNU DIFF.
5 5
6 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002 6 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002
7 Free Software Foundation, Inc. 7 Free Software Foundation, Inc.
8 8
40 Afterward, each class is represented by a number. */ 40 Afterward, each class is represented by a number. */
41 struct equivclass 41 struct equivclass
42 { 42 {
43 lin next; /* Next item in this bucket. */ 43 lin next; /* Next item in this bucket. */
44 hash_value hash; /* Hash of lines in this class. */ 44 hash_value hash; /* Hash of lines in this class. */
45 char const *line; /* A line that fits this class. */ 45 const QChar *line; /* A line that fits this class. */
46 size_t length; /* That line's length, not counting its newline. */ 46 size_t length; /* That line's length, not counting its newline. */
47 }; 47 };
48 48
49 /* Hash-table: array of buckets, each being a chain of equivalence classes. 49 /* Hash-table: array of buckets, each being a chain of equivalence classes.
50 buckets[-1] is reserved for incomplete lines. */ 50 buckets[-1] is reserved for incomplete lines. */
77 class for each line. */ 77 class for each line. */
78 78
79 void GnuDiff::find_and_hash_each_line (struct file_data *current) 79 void GnuDiff::find_and_hash_each_line (struct file_data *current)
80 { 80 {
81 hash_value h; 81 hash_value h;
82 unsigned char const *p = (unsigned char const *) current->prefix_end; 82 const QChar *p = (const QChar *) current->prefix_end;
83 unsigned char c; 83 QChar c;
84 lin i, *bucket; 84 lin i, *bucket;
85 size_t length; 85 size_t length;
86 86
87 /* Cache often-used quantities in local variables to help the compiler. */ 87 /* Cache often-used quantities in local variables to help the compiler. */
88 char const **linbuf = current->linbuf; 88 const QChar **linbuf = current->linbuf;
89 lin alloc_lines = current->alloc_lines; 89 lin alloc_lines = current->alloc_lines;
90 lin line = 0; 90 lin line = 0;
91 lin linbuf_base = current->linbuf_base; 91 lin linbuf_base = current->linbuf_base;
92 lin *cureqs = (lin*)xmalloc (alloc_lines * sizeof *cureqs); 92 lin *cureqs = (lin*)xmalloc (alloc_lines * sizeof *cureqs);
93 struct equivclass *eqs = equivs; 93 struct equivclass *eqs = equivs;
94 lin eqs_index = equivs_index; 94 lin eqs_index = equivs_index;
95 lin eqs_alloc = equivs_alloc; 95 lin eqs_alloc = equivs_alloc;
96 char const *suffix_begin = current->suffix_begin; 96 const QChar *suffix_begin = current->suffix_begin;
97 char const *bufend = FILE_BUFFER (current) + current->buffered; 97 const QChar *bufend = FILE_BUFFER (current) + current->buffered;
98 bool diff_length_compare_anyway = 98 bool diff_length_compare_anyway =
99 ignore_white_space != IGNORE_NO_WHITE_SPACE || bIgnoreNumbers; 99 ignore_white_space != IGNORE_NO_WHITE_SPACE || bIgnoreNumbers;
100 bool same_length_diff_contents_compare_anyway = 100 bool same_length_diff_contents_compare_anyway =
101 diff_length_compare_anyway | ignore_case; 101 diff_length_compare_anyway | ignore_case;
102 102
103 while ((char const *) p < suffix_begin) 103 while ((const QChar *) p < suffix_begin)
104 { 104 {
105 char const *ip = (char const *) p; 105 const QChar *ip = (const QChar *) p;
106 106
107 h = 0; 107 h = 0;
108 108
109 /* Hash this line until we find a newline. */ 109 /* Hash this line until we find a newline. */
110 if (ignore_case) 110 if (ignore_case)
111 switch (ignore_white_space) 111 switch (ignore_white_space)
112 { 112 {
113 case IGNORE_ALL_SPACE: 113 case IGNORE_ALL_SPACE:
114 while ((c = *p++) != '\n') 114 while ((c = *p++) != '\n')
115 if (! (ISSPACE (c) || bIgnoreNumbers && (isdigit( c ) || c=='-' || c=='.' ) )) 115 if (! (isWhite(c) || bIgnoreNumbers && (c.isDigit() || c=='-' || c=='.' ) ))
116 h = HASH (h, TOLOWER (c)); 116 h = HASH (h, c.lower().unicode());
117 break;
118
119 case IGNORE_SPACE_CHANGE:
120 while ((c = *p++) != '\n')
121 {
122 if (ISSPACE (c))
123 {
124 do
125 if ((c = *p++) == '\n')
126 goto hashing_done;
127 while (ISSPACE (c));
128
129 h = HASH (h, ' ');
130 }
131
132 /* C is now the first non-space. */
133 h = HASH (h, TOLOWER (c));
134 }
135 break;
136
137 case IGNORE_TAB_EXPANSION:
138 {
139 size_t column = 0;
140 while ((c = *p++) != '\n')
141 {
142 int repetitions = 1;
143
144 switch (c)
145 {
146 case '\b':
147 column -= 0 < column;
148 break;
149
150 case '\t':
151 c = ' ';
152 repetitions = TAB_WIDTH - column % TAB_WIDTH;
153 column += repetitions;
154 break;
155
156 case '\r':
157 column = 0;
158 break;
159
160 default:
161 c = TOLOWER (c);
162 column++;
163 break;
164 }
165
166 do
167 h = HASH (h, c);
168 while (--repetitions != 0);
169 }
170 }
171 break; 117 break;
172 118
173 default: 119 default:
174 while ((c = *p++) != '\n') 120 while ((c = *p++) != '\n')
175 h = HASH (h, TOLOWER (c)); 121 h = HASH (h, c.lower().unicode());
176 break; 122 break;
177 } 123 }
178 else 124 else
179 switch (ignore_white_space) 125 switch (ignore_white_space)
180 { 126 {
181 case IGNORE_ALL_SPACE: 127 case IGNORE_ALL_SPACE:
182 while ((c = *p++) != '\n') 128 while ((c = *p++) != '\n')
183 if (! (ISSPACE (c)|| bIgnoreNumbers && (isdigit( c ) || c=='-' || c=='.' ) )) 129 if (! (isWhite(c)|| bIgnoreNumbers && (c.isDigit() || c=='-' || c=='.' ) ))
184 h = HASH (h, c); 130 h = HASH (h, c.unicode());
185 break;
186
187 case IGNORE_SPACE_CHANGE:
188 while ((c = *p++) != '\n')
189 {
190 if (ISSPACE (c))
191 {
192 do
193 if ((c = *p++) == '\n')
194 goto hashing_done;
195 while (ISSPACE (c));
196
197 h = HASH (h, ' ');
198 }
199
200 /* C is now the first non-space. */
201 h = HASH (h, c);
202 }
203 break;
204
205 case IGNORE_TAB_EXPANSION:
206 {
207 size_t column = 0;
208 while ((c = *p++) != '\n')
209 {
210 int repetitions = 1;
211
212 switch (c)
213 {
214 case '\b':
215 column -= 0 < column;
216 break;
217
218 case '\t':
219 c = ' ';
220 repetitions = TAB_WIDTH - column % TAB_WIDTH;
221 column += repetitions;
222 break;
223
224 case '\r':
225 column = 0;
226 break;
227
228 default:
229 column++;
230 break;
231 }
232
233 do
234 h = HASH (h, c);
235 while (--repetitions != 0);
236 }
237 }
238 break; 131 break;
239 132
240 default: 133 default:
241 while ((c = *p++) != '\n') 134 while ((c = *p++) != '\n')
242 h = HASH (h, c); 135 h = HASH (h, c.unicode());
243 break; 136 break;
244 } 137 }
245 138
246 hashing_done:;
247
248 bucket = &buckets[h % nbuckets]; 139 bucket = &buckets[h % nbuckets];
249 length = (char const *) p - ip - 1; 140 length = (const QChar *) p - ip - 1;
250 141
251 if ((char const *) p == bufend 142 if ((const QChar *) p >= bufend
252 && current->missing_newline 143 && current->missing_newline
253 && ROBUST_OUTPUT_STYLE (output_style)) 144 && ROBUST_OUTPUT_STYLE (output_style))
254 { 145 {
255 /* This line is incomplete. If this is significant, 146 /* This line is incomplete. If this is significant,
256 put the line into buckets[-1]. */ 147 put the line into buckets[-1]. */
257 if (ignore_white_space < IGNORE_SPACE_CHANGE) 148 if (ignore_white_space < IGNORE_SPACE_CHANGE)
258 bucket = &buckets[-1]; 149 bucket = &buckets[-1];
259 150
260 /* Omit the inserted newline when computing linbuf later. */ 151 /* Omit the inserted newline when computing linbuf later. */
261 p--; 152 p--;
262 bufend = suffix_begin = (char const *) p; 153 bufend = suffix_begin = (const QChar *) p;
263 } 154 }
264 155
265 for (i = *bucket; ; i = eqs[i].next) 156 for (i = *bucket; ; i = eqs[i].next)
266 if (!i) 157 if (!i)
267 { 158 {
281 *bucket = i; 172 *bucket = i;
282 break; 173 break;
283 } 174 }
284 else if (eqs[i].hash == h) 175 else if (eqs[i].hash == h)
285 { 176 {
286 char const *eqline = eqs[i].line; 177 const QChar *eqline = eqs[i].line;
287 178
288 /* Reuse existing class if lines_differ reports the lines 179 /* Reuse existing class if lines_differ reports the lines
289 equal. */ 180 equal. */
290 if (eqs[i].length == length) 181 if (eqs[i].length == length)
291 { 182 {
313 || (lin)(PTRDIFF_MAX / sizeof *linbuf) <= alloc_lines - linbuf_base) 204 || (lin)(PTRDIFF_MAX / sizeof *linbuf) <= alloc_lines - linbuf_base)
314 xalloc_die (); 205 xalloc_die ();
315 alloc_lines = 2 * alloc_lines - linbuf_base; 206 alloc_lines = 2 * alloc_lines - linbuf_base;
316 cureqs =(lin*) xrealloc (cureqs, alloc_lines * sizeof *cureqs); 207 cureqs =(lin*) xrealloc (cureqs, alloc_lines * sizeof *cureqs);
317 linbuf += linbuf_base; 208 linbuf += linbuf_base;
318 linbuf = (const char**) xrealloc (linbuf, 209 linbuf = (const QChar**) xrealloc (linbuf,
319 (alloc_lines - linbuf_base) * sizeof *linbuf); 210 (alloc_lines - linbuf_base) * sizeof *linbuf);
320 linbuf -= linbuf_base; 211 linbuf -= linbuf_base;
321 } 212 }
322 linbuf[line] = ip; 213 linbuf[line] = ip;
323 cureqs[line] = i; 214 cureqs[line] = i;
338 || (lin)(PTRDIFF_MAX / sizeof *cureqs) <= 2 * alloc_lines - linbuf_base 229 || (lin)(PTRDIFF_MAX / sizeof *cureqs) <= 2 * alloc_lines - linbuf_base
339 || (lin)(PTRDIFF_MAX / sizeof *linbuf) <= alloc_lines - linbuf_base) 230 || (lin)(PTRDIFF_MAX / sizeof *linbuf) <= alloc_lines - linbuf_base)
340 xalloc_die (); 231 xalloc_die ();
341 alloc_lines = 2 * alloc_lines - linbuf_base; 232 alloc_lines = 2 * alloc_lines - linbuf_base;
342 linbuf += linbuf_base; 233 linbuf += linbuf_base;
343 linbuf = (const char**)xrealloc (linbuf, 234 linbuf = (const QChar**)xrealloc (linbuf,
344 (alloc_lines - linbuf_base) * sizeof *linbuf); 235 (alloc_lines - linbuf_base) * sizeof *linbuf);
345 linbuf -= linbuf_base; 236 linbuf -= linbuf_base;
346 } 237 }
347 linbuf[line] = (char const *) p; 238 linbuf[line] = (const QChar *) p;
348 239
349 if ((char const *) p == bufend) 240 if ((const QChar *) p >= bufend)
350 break; 241 break;
351 242
352 if (context <= i && no_diff_means_no_output) 243 if (context <= i && no_diff_means_no_output)
353 break; 244 break;
354 245
374 Strip trailing CRs, if that was requested. */ 265 Strip trailing CRs, if that was requested. */
375 266
376 void GnuDiff::prepare_text (struct file_data *current) 267 void GnuDiff::prepare_text (struct file_data *current)
377 { 268 {
378 size_t buffered = current->buffered; 269 size_t buffered = current->buffered;
379 char *p = FILE_BUFFER (current); 270 QChar *p = FILE_BUFFER (current);
380 char *dst;
381 271
382 if (buffered == 0 || p[buffered - 1] == '\n') 272 if (buffered == 0 || p[buffered - 1] == '\n')
383 current->missing_newline = 0; 273 current->missing_newline = 0;
384 else 274 else
385 { 275 {
390 if (!p) 280 if (!p)
391 return; 281 return;
392 282
393 /* Don't use uninitialized storage when planting or using sentinels. */ 283 /* Don't use uninitialized storage when planting or using sentinels. */
394 memset (p + buffered, 0, sizeof (word)); 284 memset (p + buffered, 0, sizeof (word));
395
396 if (strip_trailing_cr && (dst = (char*)memchr (p, '\r', buffered)))
397 {
398 char const *src = dst;
399 char const *srclim = p + buffered;
400
401 do
402 dst += ! ((*dst = *src++) == '\r' && *src == '\n');
403 while (src < srclim);
404
405 buffered -= src - dst;
406 }
407 285
408 current->buffered = buffered; 286 current->buffered = buffered;
409 } 287 }
410 288
411 /* We have found N lines in a buffer of size S; guess the 289 /* We have found N lines in a buffer of size S; guess the
415 static lin 293 static lin
416 guess_lines (lin n, size_t s, size_t t) 294 guess_lines (lin n, size_t s, size_t t)
417 { 295 {
418 size_t guessed_bytes_per_line = n < 10 ? 32 : s / (n - 1); 296 size_t guessed_bytes_per_line = n < 10 ? 32 : s / (n - 1);
419 lin guessed_lines = MAX (1, t / guessed_bytes_per_line); 297 lin guessed_lines = MAX (1, t / guessed_bytes_per_line);
420 return MIN (guessed_lines, (lin)(PTRDIFF_MAX / (2 * sizeof (char *) + 1) - 5)) + 5; 298 return MIN (guessed_lines, (lin)(PTRDIFF_MAX / (2 * sizeof (QChar *) + 1) - 5)) + 5;
421 } 299 }
422 300
423 /* Given a vector of two file_data objects, find the identical 301 /* Given a vector of two file_data objects, find the identical
424 prefixes and suffixes of each object. */ 302 prefixes and suffixes of each object. */
425 303
426 void GnuDiff::find_identical_ends (struct file_data filevec[]) 304 void GnuDiff::find_identical_ends (struct file_data filevec[])
427 { 305 {
428 word *w0, *w1; 306 word *w0, *w1;
429 char *p0, *p1, *buffer0, *buffer1; 307 QChar *p0, *p1, *buffer0, *buffer1;
430 char const *end0, *beg0; 308 const QChar *end0, *beg0;
431 char const **linbuf0, **linbuf1; 309 const QChar **linbuf0, **linbuf1;
432 lin i, lines; 310 lin i, lines;
433 size_t n0, n1; 311 size_t n0, n1;
434 lin alloc_lines0, alloc_lines1; 312 lin alloc_lines0, alloc_lines1;
435 lin buffered_prefix, prefix_count, prefix_mask; 313 lin buffered_prefix, prefix_count, prefix_mask;
436 lin middle_guess, suffix_guess; 314 lin middle_guess, suffix_guess;
440 318
441 /* Find identical prefix. */ 319 /* Find identical prefix. */
442 320
443 w0 = filevec[0].buffer; 321 w0 = filevec[0].buffer;
444 w1 = filevec[1].buffer; 322 w1 = filevec[1].buffer;
445 p0 = buffer0 = (char *) w0; 323 p0 = buffer0 = (QChar *) w0;
446 p1 = buffer1 = (char *) w1; 324 p1 = buffer1 = (QChar *) w1;
447 n0 = filevec[0].buffered; 325 n0 = filevec[0].buffered;
448 n1 = filevec[1].buffered; 326 n1 = filevec[1].buffered;
449 327
450 if (p0 == p1) 328 if (p0 == p1)
451 /* The buffers are the same; sentinels won't work. */ 329 /* The buffers are the same; sentinels won't work. */
465 /* Compare a word at a time for speed. */ 343 /* Compare a word at a time for speed. */
466 while (*w0 == *w1) 344 while (*w0 == *w1)
467 w0++, w1++; 345 w0++, w1++;
468 346
469 /* Do the last few bytes of comparison a byte at a time. */ 347 /* Do the last few bytes of comparison a byte at a time. */
470 p0 = (char *) w0; 348 p0 = (QChar *) w0;
471 p1 = (char *) w1; 349 p1 = (QChar *) w1;
472 while (*p0 == *p1) 350 while (*p0 == *p1)
473 p0++, p1++; 351 p0++, p1++;
474 352
475 /* Don't mistakenly count missing newline as part of prefix. */ 353 /* Don't mistakenly count missing newline as part of prefix. */
476 if (ROBUST_OUTPUT_STYLE (output_style) 354 if (ROBUST_OUTPUT_STYLE (output_style)
565 alloc_lines0 = guess_lines (0, 0, n0); 443 alloc_lines0 = guess_lines (0, 0, n0);
566 } 444 }
567 445
568 prefix_mask = prefix_count - 1; 446 prefix_mask = prefix_count - 1;
569 lines = 0; 447 lines = 0;
570 linbuf0 = (const char**) xmalloc (alloc_lines0 * sizeof *linbuf0); 448 linbuf0 = (const QChar**) xmalloc (alloc_lines0 * sizeof(*linbuf0));
571 p0 = buffer0; 449 p0 = buffer0;
572 450
573 /* If the prefix is needed, find the prefix lines. */ 451 /* If the prefix is needed, find the prefix lines. */
574 if (! (no_diff_means_no_output 452 if (! (no_diff_means_no_output
575 && filevec[0].prefix_end == p0 453 && filevec[0].prefix_end == p0
582 if (l == alloc_lines0) 460 if (l == alloc_lines0)
583 { 461 {
584 if ((lin)(PTRDIFF_MAX / (2 * sizeof *linbuf0)) <= alloc_lines0) 462 if ((lin)(PTRDIFF_MAX / (2 * sizeof *linbuf0)) <= alloc_lines0)
585 xalloc_die (); 463 xalloc_die ();
586 alloc_lines0 *= 2; 464 alloc_lines0 *= 2;
587 linbuf0 = (const char**) xrealloc (linbuf0, alloc_lines0 * sizeof *linbuf0); 465 linbuf0 = (const QChar**) xrealloc (linbuf0, alloc_lines0 * sizeof(*linbuf0));
588 } 466 }
589 linbuf0[l] = p0; 467 linbuf0[l] = p0;
590 while (*p0++ != '\n') 468 while (*p0++ != '\n')
591 continue; 469 continue;
592 } 470 }
599 suffix_guess = guess_lines (lines, p0 - buffer0, buffer1 + n1 - p1); 477 suffix_guess = guess_lines (lines, p0 - buffer0, buffer1 + n1 - p1);
600 alloc_lines1 = buffered_prefix + middle_guess + MIN (context, suffix_guess); 478 alloc_lines1 = buffered_prefix + middle_guess + MIN (context, suffix_guess);
601 if (alloc_lines1 < buffered_prefix 479 if (alloc_lines1 < buffered_prefix
602 || (lin)(PTRDIFF_MAX / sizeof *linbuf1) <= alloc_lines1) 480 || (lin)(PTRDIFF_MAX / sizeof *linbuf1) <= alloc_lines1)
603 xalloc_die (); 481 xalloc_die ();
604 linbuf1 = (const char**)xmalloc (alloc_lines1 * sizeof *linbuf1); 482 linbuf1 = (const QChar**)xmalloc (alloc_lines1 * sizeof(*linbuf1));
605 483
606 if (buffered_prefix != lines) 484 if (buffered_prefix != lines)
607 { 485 {
608 /* Rotate prefix lines to proper location. */ 486 /* Rotate prefix lines to proper location. */
609 for (i = 0; i < buffered_prefix; i++) 487 for (i = 0; i < buffered_prefix; i++)