Mercurial > hg > easyhg-kdiff3
comparison kdiff3/src/gnudiff_io.cpp @ 68:d7cafcda8c99
KDiff3 0.9.87
author | joachim99 |
---|---|
date | Mon, 31 Jan 2005 22:30:47 +0000 |
parents | efe33e938730 |
children | 8febbfb1148c |
comparison
equal
deleted
inserted
replaced
67:ec82d69e8b0c | 68:d7cafcda8c99 |
---|---|
1 /* File I/O for GNU DIFF. | 1 /* File I/O for GNU DIFF. |
2 | 2 |
3 Modified for KDiff3 by Joachim Eibl 2003. | 3 Modified for KDiff3 by Joachim Eibl 2003, 2004, 2005. |
4 The original file was part of GNU DIFF. | 4 The original file was part of GNU DIFF. |
5 | 5 |
6 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002 | 6 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002 |
7 Free Software Foundation, Inc. | 7 Free Software Foundation, Inc. |
8 | 8 |
40 Afterward, each class is represented by a number. */ | 40 Afterward, each class is represented by a number. */ |
41 struct equivclass | 41 struct equivclass |
42 { | 42 { |
43 lin next; /* Next item in this bucket. */ | 43 lin next; /* Next item in this bucket. */ |
44 hash_value hash; /* Hash of lines in this class. */ | 44 hash_value hash; /* Hash of lines in this class. */ |
45 char const *line; /* A line that fits this class. */ | 45 const QChar *line; /* A line that fits this class. */ |
46 size_t length; /* That line's length, not counting its newline. */ | 46 size_t length; /* That line's length, not counting its newline. */ |
47 }; | 47 }; |
48 | 48 |
49 /* Hash-table: array of buckets, each being a chain of equivalence classes. | 49 /* Hash-table: array of buckets, each being a chain of equivalence classes. |
50 buckets[-1] is reserved for incomplete lines. */ | 50 buckets[-1] is reserved for incomplete lines. */ |
77 class for each line. */ | 77 class for each line. */ |
78 | 78 |
79 void GnuDiff::find_and_hash_each_line (struct file_data *current) | 79 void GnuDiff::find_and_hash_each_line (struct file_data *current) |
80 { | 80 { |
81 hash_value h; | 81 hash_value h; |
82 unsigned char const *p = (unsigned char const *) current->prefix_end; | 82 const QChar *p = (const QChar *) current->prefix_end; |
83 unsigned char c; | 83 QChar c; |
84 lin i, *bucket; | 84 lin i, *bucket; |
85 size_t length; | 85 size_t length; |
86 | 86 |
87 /* Cache often-used quantities in local variables to help the compiler. */ | 87 /* Cache often-used quantities in local variables to help the compiler. */ |
88 char const **linbuf = current->linbuf; | 88 const QChar **linbuf = current->linbuf; |
89 lin alloc_lines = current->alloc_lines; | 89 lin alloc_lines = current->alloc_lines; |
90 lin line = 0; | 90 lin line = 0; |
91 lin linbuf_base = current->linbuf_base; | 91 lin linbuf_base = current->linbuf_base; |
92 lin *cureqs = (lin*)xmalloc (alloc_lines * sizeof *cureqs); | 92 lin *cureqs = (lin*)xmalloc (alloc_lines * sizeof *cureqs); |
93 struct equivclass *eqs = equivs; | 93 struct equivclass *eqs = equivs; |
94 lin eqs_index = equivs_index; | 94 lin eqs_index = equivs_index; |
95 lin eqs_alloc = equivs_alloc; | 95 lin eqs_alloc = equivs_alloc; |
96 char const *suffix_begin = current->suffix_begin; | 96 const QChar *suffix_begin = current->suffix_begin; |
97 char const *bufend = FILE_BUFFER (current) + current->buffered; | 97 const QChar *bufend = FILE_BUFFER (current) + current->buffered; |
98 bool diff_length_compare_anyway = | 98 bool diff_length_compare_anyway = |
99 ignore_white_space != IGNORE_NO_WHITE_SPACE || bIgnoreNumbers; | 99 ignore_white_space != IGNORE_NO_WHITE_SPACE || bIgnoreNumbers; |
100 bool same_length_diff_contents_compare_anyway = | 100 bool same_length_diff_contents_compare_anyway = |
101 diff_length_compare_anyway | ignore_case; | 101 diff_length_compare_anyway | ignore_case; |
102 | 102 |
103 while ((char const *) p < suffix_begin) | 103 while ((const QChar *) p < suffix_begin) |
104 { | 104 { |
105 char const *ip = (char const *) p; | 105 const QChar *ip = (const QChar *) p; |
106 | 106 |
107 h = 0; | 107 h = 0; |
108 | 108 |
109 /* Hash this line until we find a newline. */ | 109 /* Hash this line until we find a newline. */ |
110 if (ignore_case) | 110 if (ignore_case) |
111 switch (ignore_white_space) | 111 switch (ignore_white_space) |
112 { | 112 { |
113 case IGNORE_ALL_SPACE: | 113 case IGNORE_ALL_SPACE: |
114 while ((c = *p++) != '\n') | 114 while ((c = *p++) != '\n') |
115 if (! (ISSPACE (c) || bIgnoreNumbers && (isdigit( c ) || c=='-' || c=='.' ) )) | 115 if (! (isWhite(c) || bIgnoreNumbers && (c.isDigit() || c=='-' || c=='.' ) )) |
116 h = HASH (h, TOLOWER (c)); | 116 h = HASH (h, c.lower().unicode()); |
117 break; | |
118 | |
119 case IGNORE_SPACE_CHANGE: | |
120 while ((c = *p++) != '\n') | |
121 { | |
122 if (ISSPACE (c)) | |
123 { | |
124 do | |
125 if ((c = *p++) == '\n') | |
126 goto hashing_done; | |
127 while (ISSPACE (c)); | |
128 | |
129 h = HASH (h, ' '); | |
130 } | |
131 | |
132 /* C is now the first non-space. */ | |
133 h = HASH (h, TOLOWER (c)); | |
134 } | |
135 break; | |
136 | |
137 case IGNORE_TAB_EXPANSION: | |
138 { | |
139 size_t column = 0; | |
140 while ((c = *p++) != '\n') | |
141 { | |
142 int repetitions = 1; | |
143 | |
144 switch (c) | |
145 { | |
146 case '\b': | |
147 column -= 0 < column; | |
148 break; | |
149 | |
150 case '\t': | |
151 c = ' '; | |
152 repetitions = TAB_WIDTH - column % TAB_WIDTH; | |
153 column += repetitions; | |
154 break; | |
155 | |
156 case '\r': | |
157 column = 0; | |
158 break; | |
159 | |
160 default: | |
161 c = TOLOWER (c); | |
162 column++; | |
163 break; | |
164 } | |
165 | |
166 do | |
167 h = HASH (h, c); | |
168 while (--repetitions != 0); | |
169 } | |
170 } | |
171 break; | 117 break; |
172 | 118 |
173 default: | 119 default: |
174 while ((c = *p++) != '\n') | 120 while ((c = *p++) != '\n') |
175 h = HASH (h, TOLOWER (c)); | 121 h = HASH (h, c.lower().unicode()); |
176 break; | 122 break; |
177 } | 123 } |
178 else | 124 else |
179 switch (ignore_white_space) | 125 switch (ignore_white_space) |
180 { | 126 { |
181 case IGNORE_ALL_SPACE: | 127 case IGNORE_ALL_SPACE: |
182 while ((c = *p++) != '\n') | 128 while ((c = *p++) != '\n') |
183 if (! (ISSPACE (c)|| bIgnoreNumbers && (isdigit( c ) || c=='-' || c=='.' ) )) | 129 if (! (isWhite(c)|| bIgnoreNumbers && (c.isDigit() || c=='-' || c=='.' ) )) |
184 h = HASH (h, c); | 130 h = HASH (h, c.unicode()); |
185 break; | |
186 | |
187 case IGNORE_SPACE_CHANGE: | |
188 while ((c = *p++) != '\n') | |
189 { | |
190 if (ISSPACE (c)) | |
191 { | |
192 do | |
193 if ((c = *p++) == '\n') | |
194 goto hashing_done; | |
195 while (ISSPACE (c)); | |
196 | |
197 h = HASH (h, ' '); | |
198 } | |
199 | |
200 /* C is now the first non-space. */ | |
201 h = HASH (h, c); | |
202 } | |
203 break; | |
204 | |
205 case IGNORE_TAB_EXPANSION: | |
206 { | |
207 size_t column = 0; | |
208 while ((c = *p++) != '\n') | |
209 { | |
210 int repetitions = 1; | |
211 | |
212 switch (c) | |
213 { | |
214 case '\b': | |
215 column -= 0 < column; | |
216 break; | |
217 | |
218 case '\t': | |
219 c = ' '; | |
220 repetitions = TAB_WIDTH - column % TAB_WIDTH; | |
221 column += repetitions; | |
222 break; | |
223 | |
224 case '\r': | |
225 column = 0; | |
226 break; | |
227 | |
228 default: | |
229 column++; | |
230 break; | |
231 } | |
232 | |
233 do | |
234 h = HASH (h, c); | |
235 while (--repetitions != 0); | |
236 } | |
237 } | |
238 break; | 131 break; |
239 | 132 |
240 default: | 133 default: |
241 while ((c = *p++) != '\n') | 134 while ((c = *p++) != '\n') |
242 h = HASH (h, c); | 135 h = HASH (h, c.unicode()); |
243 break; | 136 break; |
244 } | 137 } |
245 | 138 |
246 hashing_done:; | |
247 | |
248 bucket = &buckets[h % nbuckets]; | 139 bucket = &buckets[h % nbuckets]; |
249 length = (char const *) p - ip - 1; | 140 length = (const QChar *) p - ip - 1; |
250 | 141 |
251 if ((char const *) p == bufend | 142 if ((const QChar *) p >= bufend |
252 && current->missing_newline | 143 && current->missing_newline |
253 && ROBUST_OUTPUT_STYLE (output_style)) | 144 && ROBUST_OUTPUT_STYLE (output_style)) |
254 { | 145 { |
255 /* This line is incomplete. If this is significant, | 146 /* This line is incomplete. If this is significant, |
256 put the line into buckets[-1]. */ | 147 put the line into buckets[-1]. */ |
257 if (ignore_white_space < IGNORE_SPACE_CHANGE) | 148 if (ignore_white_space < IGNORE_SPACE_CHANGE) |
258 bucket = &buckets[-1]; | 149 bucket = &buckets[-1]; |
259 | 150 |
260 /* Omit the inserted newline when computing linbuf later. */ | 151 /* Omit the inserted newline when computing linbuf later. */ |
261 p--; | 152 p--; |
262 bufend = suffix_begin = (char const *) p; | 153 bufend = suffix_begin = (const QChar *) p; |
263 } | 154 } |
264 | 155 |
265 for (i = *bucket; ; i = eqs[i].next) | 156 for (i = *bucket; ; i = eqs[i].next) |
266 if (!i) | 157 if (!i) |
267 { | 158 { |
281 *bucket = i; | 172 *bucket = i; |
282 break; | 173 break; |
283 } | 174 } |
284 else if (eqs[i].hash == h) | 175 else if (eqs[i].hash == h) |
285 { | 176 { |
286 char const *eqline = eqs[i].line; | 177 const QChar *eqline = eqs[i].line; |
287 | 178 |
288 /* Reuse existing class if lines_differ reports the lines | 179 /* Reuse existing class if lines_differ reports the lines |
289 equal. */ | 180 equal. */ |
290 if (eqs[i].length == length) | 181 if (eqs[i].length == length) |
291 { | 182 { |
313 || (lin)(PTRDIFF_MAX / sizeof *linbuf) <= alloc_lines - linbuf_base) | 204 || (lin)(PTRDIFF_MAX / sizeof *linbuf) <= alloc_lines - linbuf_base) |
314 xalloc_die (); | 205 xalloc_die (); |
315 alloc_lines = 2 * alloc_lines - linbuf_base; | 206 alloc_lines = 2 * alloc_lines - linbuf_base; |
316 cureqs =(lin*) xrealloc (cureqs, alloc_lines * sizeof *cureqs); | 207 cureqs =(lin*) xrealloc (cureqs, alloc_lines * sizeof *cureqs); |
317 linbuf += linbuf_base; | 208 linbuf += linbuf_base; |
318 linbuf = (const char**) xrealloc (linbuf, | 209 linbuf = (const QChar**) xrealloc (linbuf, |
319 (alloc_lines - linbuf_base) * sizeof *linbuf); | 210 (alloc_lines - linbuf_base) * sizeof *linbuf); |
320 linbuf -= linbuf_base; | 211 linbuf -= linbuf_base; |
321 } | 212 } |
322 linbuf[line] = ip; | 213 linbuf[line] = ip; |
323 cureqs[line] = i; | 214 cureqs[line] = i; |
338 || (lin)(PTRDIFF_MAX / sizeof *cureqs) <= 2 * alloc_lines - linbuf_base | 229 || (lin)(PTRDIFF_MAX / sizeof *cureqs) <= 2 * alloc_lines - linbuf_base |
339 || (lin)(PTRDIFF_MAX / sizeof *linbuf) <= alloc_lines - linbuf_base) | 230 || (lin)(PTRDIFF_MAX / sizeof *linbuf) <= alloc_lines - linbuf_base) |
340 xalloc_die (); | 231 xalloc_die (); |
341 alloc_lines = 2 * alloc_lines - linbuf_base; | 232 alloc_lines = 2 * alloc_lines - linbuf_base; |
342 linbuf += linbuf_base; | 233 linbuf += linbuf_base; |
343 linbuf = (const char**)xrealloc (linbuf, | 234 linbuf = (const QChar**)xrealloc (linbuf, |
344 (alloc_lines - linbuf_base) * sizeof *linbuf); | 235 (alloc_lines - linbuf_base) * sizeof *linbuf); |
345 linbuf -= linbuf_base; | 236 linbuf -= linbuf_base; |
346 } | 237 } |
347 linbuf[line] = (char const *) p; | 238 linbuf[line] = (const QChar *) p; |
348 | 239 |
349 if ((char const *) p == bufend) | 240 if ((const QChar *) p >= bufend) |
350 break; | 241 break; |
351 | 242 |
352 if (context <= i && no_diff_means_no_output) | 243 if (context <= i && no_diff_means_no_output) |
353 break; | 244 break; |
354 | 245 |
374 Strip trailing CRs, if that was requested. */ | 265 Strip trailing CRs, if that was requested. */ |
375 | 266 |
376 void GnuDiff::prepare_text (struct file_data *current) | 267 void GnuDiff::prepare_text (struct file_data *current) |
377 { | 268 { |
378 size_t buffered = current->buffered; | 269 size_t buffered = current->buffered; |
379 char *p = FILE_BUFFER (current); | 270 QChar *p = FILE_BUFFER (current); |
380 char *dst; | |
381 | 271 |
382 if (buffered == 0 || p[buffered - 1] == '\n') | 272 if (buffered == 0 || p[buffered - 1] == '\n') |
383 current->missing_newline = 0; | 273 current->missing_newline = 0; |
384 else | 274 else |
385 { | 275 { |
390 if (!p) | 280 if (!p) |
391 return; | 281 return; |
392 | 282 |
393 /* Don't use uninitialized storage when planting or using sentinels. */ | 283 /* Don't use uninitialized storage when planting or using sentinels. */ |
394 memset (p + buffered, 0, sizeof (word)); | 284 memset (p + buffered, 0, sizeof (word)); |
395 | |
396 if (strip_trailing_cr && (dst = (char*)memchr (p, '\r', buffered))) | |
397 { | |
398 char const *src = dst; | |
399 char const *srclim = p + buffered; | |
400 | |
401 do | |
402 dst += ! ((*dst = *src++) == '\r' && *src == '\n'); | |
403 while (src < srclim); | |
404 | |
405 buffered -= src - dst; | |
406 } | |
407 | 285 |
408 current->buffered = buffered; | 286 current->buffered = buffered; |
409 } | 287 } |
410 | 288 |
411 /* We have found N lines in a buffer of size S; guess the | 289 /* We have found N lines in a buffer of size S; guess the |
415 static lin | 293 static lin |
416 guess_lines (lin n, size_t s, size_t t) | 294 guess_lines (lin n, size_t s, size_t t) |
417 { | 295 { |
418 size_t guessed_bytes_per_line = n < 10 ? 32 : s / (n - 1); | 296 size_t guessed_bytes_per_line = n < 10 ? 32 : s / (n - 1); |
419 lin guessed_lines = MAX (1, t / guessed_bytes_per_line); | 297 lin guessed_lines = MAX (1, t / guessed_bytes_per_line); |
420 return MIN (guessed_lines, (lin)(PTRDIFF_MAX / (2 * sizeof (char *) + 1) - 5)) + 5; | 298 return MIN (guessed_lines, (lin)(PTRDIFF_MAX / (2 * sizeof (QChar *) + 1) - 5)) + 5; |
421 } | 299 } |
422 | 300 |
423 /* Given a vector of two file_data objects, find the identical | 301 /* Given a vector of two file_data objects, find the identical |
424 prefixes and suffixes of each object. */ | 302 prefixes and suffixes of each object. */ |
425 | 303 |
426 void GnuDiff::find_identical_ends (struct file_data filevec[]) | 304 void GnuDiff::find_identical_ends (struct file_data filevec[]) |
427 { | 305 { |
428 word *w0, *w1; | 306 word *w0, *w1; |
429 char *p0, *p1, *buffer0, *buffer1; | 307 QChar *p0, *p1, *buffer0, *buffer1; |
430 char const *end0, *beg0; | 308 const QChar *end0, *beg0; |
431 char const **linbuf0, **linbuf1; | 309 const QChar **linbuf0, **linbuf1; |
432 lin i, lines; | 310 lin i, lines; |
433 size_t n0, n1; | 311 size_t n0, n1; |
434 lin alloc_lines0, alloc_lines1; | 312 lin alloc_lines0, alloc_lines1; |
435 lin buffered_prefix, prefix_count, prefix_mask; | 313 lin buffered_prefix, prefix_count, prefix_mask; |
436 lin middle_guess, suffix_guess; | 314 lin middle_guess, suffix_guess; |
440 | 318 |
441 /* Find identical prefix. */ | 319 /* Find identical prefix. */ |
442 | 320 |
443 w0 = filevec[0].buffer; | 321 w0 = filevec[0].buffer; |
444 w1 = filevec[1].buffer; | 322 w1 = filevec[1].buffer; |
445 p0 = buffer0 = (char *) w0; | 323 p0 = buffer0 = (QChar *) w0; |
446 p1 = buffer1 = (char *) w1; | 324 p1 = buffer1 = (QChar *) w1; |
447 n0 = filevec[0].buffered; | 325 n0 = filevec[0].buffered; |
448 n1 = filevec[1].buffered; | 326 n1 = filevec[1].buffered; |
449 | 327 |
450 if (p0 == p1) | 328 if (p0 == p1) |
451 /* The buffers are the same; sentinels won't work. */ | 329 /* The buffers are the same; sentinels won't work. */ |
465 /* Compare a word at a time for speed. */ | 343 /* Compare a word at a time for speed. */ |
466 while (*w0 == *w1) | 344 while (*w0 == *w1) |
467 w0++, w1++; | 345 w0++, w1++; |
468 | 346 |
469 /* Do the last few bytes of comparison a byte at a time. */ | 347 /* Do the last few bytes of comparison a byte at a time. */ |
470 p0 = (char *) w0; | 348 p0 = (QChar *) w0; |
471 p1 = (char *) w1; | 349 p1 = (QChar *) w1; |
472 while (*p0 == *p1) | 350 while (*p0 == *p1) |
473 p0++, p1++; | 351 p0++, p1++; |
474 | 352 |
475 /* Don't mistakenly count missing newline as part of prefix. */ | 353 /* Don't mistakenly count missing newline as part of prefix. */ |
476 if (ROBUST_OUTPUT_STYLE (output_style) | 354 if (ROBUST_OUTPUT_STYLE (output_style) |
565 alloc_lines0 = guess_lines (0, 0, n0); | 443 alloc_lines0 = guess_lines (0, 0, n0); |
566 } | 444 } |
567 | 445 |
568 prefix_mask = prefix_count - 1; | 446 prefix_mask = prefix_count - 1; |
569 lines = 0; | 447 lines = 0; |
570 linbuf0 = (const char**) xmalloc (alloc_lines0 * sizeof *linbuf0); | 448 linbuf0 = (const QChar**) xmalloc (alloc_lines0 * sizeof(*linbuf0)); |
571 p0 = buffer0; | 449 p0 = buffer0; |
572 | 450 |
573 /* If the prefix is needed, find the prefix lines. */ | 451 /* If the prefix is needed, find the prefix lines. */ |
574 if (! (no_diff_means_no_output | 452 if (! (no_diff_means_no_output |
575 && filevec[0].prefix_end == p0 | 453 && filevec[0].prefix_end == p0 |
582 if (l == alloc_lines0) | 460 if (l == alloc_lines0) |
583 { | 461 { |
584 if ((lin)(PTRDIFF_MAX / (2 * sizeof *linbuf0)) <= alloc_lines0) | 462 if ((lin)(PTRDIFF_MAX / (2 * sizeof *linbuf0)) <= alloc_lines0) |
585 xalloc_die (); | 463 xalloc_die (); |
586 alloc_lines0 *= 2; | 464 alloc_lines0 *= 2; |
587 linbuf0 = (const char**) xrealloc (linbuf0, alloc_lines0 * sizeof *linbuf0); | 465 linbuf0 = (const QChar**) xrealloc (linbuf0, alloc_lines0 * sizeof(*linbuf0)); |
588 } | 466 } |
589 linbuf0[l] = p0; | 467 linbuf0[l] = p0; |
590 while (*p0++ != '\n') | 468 while (*p0++ != '\n') |
591 continue; | 469 continue; |
592 } | 470 } |
599 suffix_guess = guess_lines (lines, p0 - buffer0, buffer1 + n1 - p1); | 477 suffix_guess = guess_lines (lines, p0 - buffer0, buffer1 + n1 - p1); |
600 alloc_lines1 = buffered_prefix + middle_guess + MIN (context, suffix_guess); | 478 alloc_lines1 = buffered_prefix + middle_guess + MIN (context, suffix_guess); |
601 if (alloc_lines1 < buffered_prefix | 479 if (alloc_lines1 < buffered_prefix |
602 || (lin)(PTRDIFF_MAX / sizeof *linbuf1) <= alloc_lines1) | 480 || (lin)(PTRDIFF_MAX / sizeof *linbuf1) <= alloc_lines1) |
603 xalloc_die (); | 481 xalloc_die (); |
604 linbuf1 = (const char**)xmalloc (alloc_lines1 * sizeof *linbuf1); | 482 linbuf1 = (const QChar**)xmalloc (alloc_lines1 * sizeof(*linbuf1)); |
605 | 483 |
606 if (buffered_prefix != lines) | 484 if (buffered_prefix != lines) |
607 { | 485 { |
608 /* Rotate prefix lines to proper location. */ | 486 /* Rotate prefix lines to proper location. */ |
609 for (i = 0; i < buffered_prefix; i++) | 487 for (i = 0; i < buffered_prefix; i++) |