Mercurial > hg > easyhg-kdiff3
comparison kdiff3/src/gnudiff_io.cpp @ 52:ba22ec30aa4e
GNU-Diff-Algorithms: Adapted for KDiff3-0.9.80
author | joachim99 |
---|---|
date | Tue, 09 Dec 2003 20:34:32 +0000 |
parents | |
children | 32d5cbf9db71 |
comparison
equal
deleted
inserted
replaced
51:c59d5a3a8ff3 | 52:ba22ec30aa4e |
---|---|
1 /* File I/O for GNU DIFF. | |
2 | |
3 Modified for KDiff3 by Joachim Eibl 2003. | |
4 | |
5 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002 | |
6 Free Software Foundation, Inc. | |
7 | |
8 This file is part of GNU DIFF. | |
9 | |
10 GNU DIFF is free software; you can redistribute it and/or modify | |
11 it under the terms of the GNU General Public License as published by | |
12 the Free Software Foundation; either version 2, or (at your option) | |
13 any later version. | |
14 | |
15 GNU DIFF is distributed in the hope that it will be useful, | |
16 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 GNU General Public License for more details. | |
19 | |
20 You should have received a copy of the GNU General Public License | |
21 along with this program; see the file COPYING. | |
22 If not, write to the Free Software Foundation, | |
23 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ | |
24 | |
25 #include "gnudiff_diff.h" | |
26 #include <stdlib.h> | |
27 #include "gnudiff_xalloc.h" | |
28 | |
29 namespace GnuDiff | |
30 { | |
31 | |
32 /* Rotate an unsigned value to the left. */ | |
33 #define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n))) | |
34 | |
35 /* Given a hash value and a new character, return a new hash value. */ | |
36 #define HASH(h, c) ((c) + ROL (h, 7)) | |
37 | |
38 /* The type of a hash value. */ | |
39 typedef size_t hash_value; | |
40 verify (hash_value_is_unsigned, ! TYPE_SIGNED (hash_value)); | |
41 | |
42 /* Lines are put into equivalence classes of lines that match in lines_differ. | |
43 Each equivalence class is represented by one of these structures, | |
44 but only while the classes are being computed. | |
45 Afterward, each class is represented by a number. */ | |
46 struct equivclass | |
47 { | |
48 lin next; /* Next item in this bucket. */ | |
49 hash_value hash; /* Hash of lines in this class. */ | |
50 char const *line; /* A line that fits this class. */ | |
51 size_t length; /* That line's length, not counting its newline. */ | |
52 }; | |
53 | |
54 /* Hash-table: array of buckets, each being a chain of equivalence classes. | |
55 buckets[-1] is reserved for incomplete lines. */ | |
56 static lin *buckets; | |
57 | |
58 /* Number of buckets in the hash table array, not counting buckets[-1]. */ | |
59 static size_t nbuckets; | |
60 | |
61 /* Array in which the equivalence classes are allocated. | |
62 The bucket-chains go through the elements in this array. | |
63 The number of an equivalence class is its index in this array. */ | |
64 static struct equivclass *equivs; | |
65 | |
66 /* Index of first free element in the array `equivs'. */ | |
67 static lin equivs_index; | |
68 | |
69 /* Number of elements allocated in the array `equivs'. */ | |
70 static lin equivs_alloc; | |
71 | |
72 | |
73 /* Check for binary files and compare them for exact identity. */ | |
74 | |
75 /* Return 1 if BUF contains a non text character. | |
76 SIZE is the number of characters in BUF. */ | |
77 | |
78 #define binary_file_p(buf, size) (memchr (buf, 0, size) != 0) | |
79 | |
80 | |
81 /* Split the file into lines, simultaneously computing the equivalence | |
82 class for each line. */ | |
83 | |
84 static void | |
85 find_and_hash_each_line (struct file_data *current) | |
86 { | |
87 hash_value h; | |
88 unsigned char const *p = (unsigned char const *) current->prefix_end; | |
89 unsigned char c; | |
90 lin i, *bucket; | |
91 size_t length; | |
92 | |
93 /* Cache often-used quantities in local variables to help the compiler. */ | |
94 char const **linbuf = current->linbuf; | |
95 lin alloc_lines = current->alloc_lines; | |
96 lin line = 0; | |
97 lin linbuf_base = current->linbuf_base; | |
98 lin *cureqs = (lin*)xmalloc (alloc_lines * sizeof *cureqs); | |
99 struct equivclass *eqs = equivs; | |
100 lin eqs_index = equivs_index; | |
101 lin eqs_alloc = equivs_alloc; | |
102 char const *suffix_begin = current->suffix_begin; | |
103 char const *bufend = FILE_BUFFER (current) + current->buffered; | |
104 bool diff_length_compare_anyway = | |
105 ignore_white_space != IGNORE_NO_WHITE_SPACE || bIgnoreNumbers; | |
106 bool same_length_diff_contents_compare_anyway = | |
107 diff_length_compare_anyway | ignore_case; | |
108 | |
109 while ((char const *) p < suffix_begin) | |
110 { | |
111 char const *ip = (char const *) p; | |
112 | |
113 h = 0; | |
114 | |
115 /* Hash this line until we find a newline. */ | |
116 if (ignore_case) | |
117 switch (ignore_white_space) | |
118 { | |
119 case IGNORE_ALL_SPACE: | |
120 while ((c = *p++) != '\n') | |
121 if (! (ISSPACE (c) || bIgnoreNumbers && (isdigit( c ) || c=='-' || c=='.' ) )) | |
122 h = HASH (h, TOLOWER (c)); | |
123 break; | |
124 | |
125 case IGNORE_SPACE_CHANGE: | |
126 while ((c = *p++) != '\n') | |
127 { | |
128 if (ISSPACE (c)) | |
129 { | |
130 do | |
131 if ((c = *p++) == '\n') | |
132 goto hashing_done; | |
133 while (ISSPACE (c)); | |
134 | |
135 h = HASH (h, ' '); | |
136 } | |
137 | |
138 /* C is now the first non-space. */ | |
139 h = HASH (h, TOLOWER (c)); | |
140 } | |
141 break; | |
142 | |
143 case IGNORE_TAB_EXPANSION: | |
144 { | |
145 size_t column = 0; | |
146 while ((c = *p++) != '\n') | |
147 { | |
148 int repetitions = 1; | |
149 | |
150 switch (c) | |
151 { | |
152 case '\b': | |
153 column -= 0 < column; | |
154 break; | |
155 | |
156 case '\t': | |
157 c = ' '; | |
158 repetitions = TAB_WIDTH - column % TAB_WIDTH; | |
159 column += repetitions; | |
160 break; | |
161 | |
162 case '\r': | |
163 column = 0; | |
164 break; | |
165 | |
166 default: | |
167 c = TOLOWER (c); | |
168 column++; | |
169 break; | |
170 } | |
171 | |
172 do | |
173 h = HASH (h, c); | |
174 while (--repetitions != 0); | |
175 } | |
176 } | |
177 break; | |
178 | |
179 default: | |
180 while ((c = *p++) != '\n') | |
181 h = HASH (h, TOLOWER (c)); | |
182 break; | |
183 } | |
184 else | |
185 switch (ignore_white_space) | |
186 { | |
187 case IGNORE_ALL_SPACE: | |
188 while ((c = *p++) != '\n') | |
189 if (! (ISSPACE (c)|| bIgnoreNumbers && (isdigit( c ) || c=='-' || c=='.' ) )) | |
190 h = HASH (h, c); | |
191 break; | |
192 | |
193 case IGNORE_SPACE_CHANGE: | |
194 while ((c = *p++) != '\n') | |
195 { | |
196 if (ISSPACE (c)) | |
197 { | |
198 do | |
199 if ((c = *p++) == '\n') | |
200 goto hashing_done; | |
201 while (ISSPACE (c)); | |
202 | |
203 h = HASH (h, ' '); | |
204 } | |
205 | |
206 /* C is now the first non-space. */ | |
207 h = HASH (h, c); | |
208 } | |
209 break; | |
210 | |
211 case IGNORE_TAB_EXPANSION: | |
212 { | |
213 size_t column = 0; | |
214 while ((c = *p++) != '\n') | |
215 { | |
216 int repetitions = 1; | |
217 | |
218 switch (c) | |
219 { | |
220 case '\b': | |
221 column -= 0 < column; | |
222 break; | |
223 | |
224 case '\t': | |
225 c = ' '; | |
226 repetitions = TAB_WIDTH - column % TAB_WIDTH; | |
227 column += repetitions; | |
228 break; | |
229 | |
230 case '\r': | |
231 column = 0; | |
232 break; | |
233 | |
234 default: | |
235 column++; | |
236 break; | |
237 } | |
238 | |
239 do | |
240 h = HASH (h, c); | |
241 while (--repetitions != 0); | |
242 } | |
243 } | |
244 break; | |
245 | |
246 default: | |
247 while ((c = *p++) != '\n') | |
248 h = HASH (h, c); | |
249 break; | |
250 } | |
251 | |
252 hashing_done:; | |
253 | |
254 bucket = &buckets[h % nbuckets]; | |
255 length = (char const *) p - ip - 1; | |
256 | |
257 if ((char const *) p == bufend | |
258 && current->missing_newline | |
259 && ROBUST_OUTPUT_STYLE (output_style)) | |
260 { | |
261 /* This line is incomplete. If this is significant, | |
262 put the line into buckets[-1]. */ | |
263 if (ignore_white_space < IGNORE_SPACE_CHANGE) | |
264 bucket = &buckets[-1]; | |
265 | |
266 /* Omit the inserted newline when computing linbuf later. */ | |
267 p--; | |
268 bufend = suffix_begin = (char const *) p; | |
269 } | |
270 | |
271 for (i = *bucket; ; i = eqs[i].next) | |
272 if (!i) | |
273 { | |
274 /* Create a new equivalence class in this bucket. */ | |
275 i = eqs_index++; | |
276 if (i == eqs_alloc) | |
277 { | |
278 if ((int)(PTRDIFF_MAX / (2 * sizeof *eqs)) <= eqs_alloc) | |
279 xalloc_die (); | |
280 eqs_alloc *= 2; | |
281 eqs = (equivclass*)xrealloc (eqs, eqs_alloc * sizeof *eqs); | |
282 } | |
283 eqs[i].next = *bucket; | |
284 eqs[i].hash = h; | |
285 eqs[i].line = ip; | |
286 eqs[i].length = length; | |
287 *bucket = i; | |
288 break; | |
289 } | |
290 else if (eqs[i].hash == h) | |
291 { | |
292 char const *eqline = eqs[i].line; | |
293 | |
294 /* Reuse existing class if lines_differ reports the lines | |
295 equal. */ | |
296 if (eqs[i].length == length) | |
297 { | |
298 /* Reuse existing equivalence class if the lines are identical. | |
299 This detects the common case of exact identity | |
300 faster than lines_differ would. */ | |
301 if (memcmp (eqline, ip, length) == 0) | |
302 break; | |
303 if (!same_length_diff_contents_compare_anyway) | |
304 continue; | |
305 } | |
306 else if (!diff_length_compare_anyway) | |
307 continue; | |
308 | |
309 if (! lines_differ (eqline, ip)) | |
310 break; | |
311 } | |
312 | |
313 /* Maybe increase the size of the line table. */ | |
314 if (line == alloc_lines) | |
315 { | |
316 /* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */ | |
317 if ((int)(PTRDIFF_MAX / 3) <= alloc_lines | |
318 || (int)(PTRDIFF_MAX / sizeof *cureqs) <= 2 * alloc_lines - linbuf_base | |
319 || (int)(PTRDIFF_MAX / sizeof *linbuf) <= alloc_lines - linbuf_base) | |
320 xalloc_die (); | |
321 alloc_lines = 2 * alloc_lines - linbuf_base; | |
322 cureqs =(lin*) xrealloc (cureqs, alloc_lines * sizeof *cureqs); | |
323 linbuf += linbuf_base; | |
324 linbuf = (const char**) xrealloc (linbuf, | |
325 (alloc_lines - linbuf_base) * sizeof *linbuf); | |
326 linbuf -= linbuf_base; | |
327 } | |
328 linbuf[line] = ip; | |
329 cureqs[line] = i; | |
330 ++line; | |
331 } | |
332 | |
333 current->buffered_lines = line; | |
334 | |
335 for (i = 0; ; i++) | |
336 { | |
337 /* Record the line start for lines in the suffix that we care about. | |
338 Record one more line start than lines, | |
339 so that we can compute the length of any buffered line. */ | |
340 if (line == alloc_lines) | |
341 { | |
342 /* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */ | |
343 if ((int)(PTRDIFF_MAX / 3) <= alloc_lines | |
344 || (int)(PTRDIFF_MAX / sizeof *cureqs) <= 2 * alloc_lines - linbuf_base | |
345 || (int)(PTRDIFF_MAX / sizeof *linbuf) <= alloc_lines - linbuf_base) | |
346 xalloc_die (); | |
347 alloc_lines = 2 * alloc_lines - linbuf_base; | |
348 linbuf += linbuf_base; | |
349 linbuf = (const char**)xrealloc (linbuf, | |
350 (alloc_lines - linbuf_base) * sizeof *linbuf); | |
351 linbuf -= linbuf_base; | |
352 } | |
353 linbuf[line] = (char const *) p; | |
354 | |
355 if ((char const *) p == bufend) | |
356 break; | |
357 | |
358 if (context <= i && no_diff_means_no_output) | |
359 break; | |
360 | |
361 line++; | |
362 | |
363 while (*p++ != '\n') | |
364 continue; | |
365 } | |
366 | |
367 /* Done with cache in local variables. */ | |
368 current->linbuf = linbuf; | |
369 current->valid_lines = line; | |
370 current->alloc_lines = alloc_lines; | |
371 current->equivs = cureqs; | |
372 equivs = eqs; | |
373 equivs_alloc = eqs_alloc; | |
374 equivs_index = eqs_index; | |
375 } | |
376 | |
377 /* Prepare the text. Make sure the text end is initialized. | |
378 Make sure text ends in a newline, | |
379 but remember that we had to add one. | |
380 Strip trailing CRs, if that was requested. */ | |
381 | |
382 static void | |
383 prepare_text (struct file_data *current) | |
384 { | |
385 size_t buffered = current->buffered; | |
386 char *p = FILE_BUFFER (current); | |
387 char *dst; | |
388 | |
389 if (buffered == 0 || p[buffered - 1] == '\n') | |
390 current->missing_newline = 0; | |
391 else | |
392 { | |
393 p[buffered++] = '\n'; | |
394 current->missing_newline = 1; | |
395 } | |
396 | |
397 if (!p) | |
398 return; | |
399 | |
400 /* Don't use uninitialized storage when planting or using sentinels. */ | |
401 memset (p + buffered, 0, sizeof (word)); | |
402 | |
403 if (strip_trailing_cr && (dst = (char*)memchr (p, '\r', buffered))) | |
404 { | |
405 char const *src = dst; | |
406 char const *srclim = p + buffered; | |
407 | |
408 do | |
409 dst += ! ((*dst = *src++) == '\r' && *src == '\n'); | |
410 while (src < srclim); | |
411 | |
412 buffered -= src - dst; | |
413 } | |
414 | |
415 current->buffered = buffered; | |
416 } | |
417 | |
418 /* We have found N lines in a buffer of size S; guess the | |
419 proportionate number of lines that will be found in a buffer of | |
420 size T. However, do not guess a number of lines so large that the | |
421 resulting line table might cause overflow in size calculations. */ | |
422 static lin | |
423 guess_lines (lin n, size_t s, size_t t) | |
424 { | |
425 size_t guessed_bytes_per_line = n < 10 ? 32 : s / (n - 1); | |
426 lin guessed_lines = MAX (1, t / guessed_bytes_per_line); | |
427 return MIN (guessed_lines, (int)(PTRDIFF_MAX / (2 * sizeof (char *) + 1) - 5)) + 5; | |
428 } | |
429 | |
430 /* Given a vector of two file_data objects, find the identical | |
431 prefixes and suffixes of each object. */ | |
432 | |
433 static void | |
434 find_identical_ends (struct file_data filevec[]) | |
435 { | |
436 word *w0, *w1; | |
437 char *p0, *p1, *buffer0, *buffer1; | |
438 char const *end0, *beg0; | |
439 char const **linbuf0, **linbuf1; | |
440 lin i, lines; | |
441 size_t n0, n1; | |
442 lin alloc_lines0, alloc_lines1; | |
443 lin buffered_prefix, prefix_count, prefix_mask; | |
444 lin middle_guess, suffix_guess; | |
445 | |
446 prepare_text (&filevec[0]); | |
447 prepare_text (&filevec[1]); | |
448 | |
449 /* Find identical prefix. */ | |
450 | |
451 w0 = filevec[0].buffer; | |
452 w1 = filevec[1].buffer; | |
453 p0 = buffer0 = (char *) w0; | |
454 p1 = buffer1 = (char *) w1; | |
455 n0 = filevec[0].buffered; | |
456 n1 = filevec[1].buffered; | |
457 | |
458 if (p0 == p1) | |
459 /* The buffers are the same; sentinels won't work. */ | |
460 p0 = p1 += n1; | |
461 else | |
462 { | |
463 /* Insert end sentinels, in this case characters that are guaranteed | |
464 to make the equality test false, and thus terminate the loop. */ | |
465 | |
466 if (n0 < n1) | |
467 p0[n0] = ~p1[n0]; | |
468 else | |
469 p1[n1] = ~p0[n1]; | |
470 | |
471 /* Loop until first mismatch, or to the sentinel characters. */ | |
472 | |
473 /* Compare a word at a time for speed. */ | |
474 while (*w0 == *w1) | |
475 w0++, w1++; | |
476 | |
477 /* Do the last few bytes of comparison a byte at a time. */ | |
478 p0 = (char *) w0; | |
479 p1 = (char *) w1; | |
480 while (*p0 == *p1) | |
481 p0++, p1++; | |
482 | |
483 /* Don't mistakenly count missing newline as part of prefix. */ | |
484 if (ROBUST_OUTPUT_STYLE (output_style) | |
485 && ((buffer0 + n0 - filevec[0].missing_newline < p0) | |
486 != | |
487 (buffer1 + n1 - filevec[1].missing_newline < p1))) | |
488 p0--, p1--; | |
489 } | |
490 | |
491 /* Now P0 and P1 point at the first nonmatching characters. */ | |
492 | |
493 /* Skip back to last line-beginning in the prefix, | |
494 and then discard up to HORIZON_LINES lines from the prefix. */ | |
495 i = horizon_lines; | |
496 while (p0 != buffer0 && (p0[-1] != '\n' || i--)) | |
497 p0--, p1--; | |
498 | |
499 /* Record the prefix. */ | |
500 filevec[0].prefix_end = p0; | |
501 filevec[1].prefix_end = p1; | |
502 | |
503 /* Find identical suffix. */ | |
504 | |
505 /* P0 and P1 point beyond the last chars not yet compared. */ | |
506 p0 = buffer0 + n0; | |
507 p1 = buffer1 + n1; | |
508 | |
509 if (! ROBUST_OUTPUT_STYLE (output_style) | |
510 || filevec[0].missing_newline == filevec[1].missing_newline) | |
511 { | |
512 end0 = p0; /* Addr of last char in file 0. */ | |
513 | |
514 /* Get value of P0 at which we should stop scanning backward: | |
515 this is when either P0 or P1 points just past the last char | |
516 of the identical prefix. */ | |
517 beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1); | |
518 | |
519 /* Scan back until chars don't match or we reach that point. */ | |
520 for (; p0 != beg0; p0--, p1--) | |
521 if (*p0 != *p1) | |
522 { | |
523 /* Point at the first char of the matching suffix. */ | |
524 beg0 = p0; | |
525 break; | |
526 } | |
527 | |
528 /* Are we at a line-beginning in both files? If not, add the rest of | |
529 this line to the main body. Discard up to HORIZON_LINES lines from | |
530 the identical suffix. Also, discard one extra line, | |
531 because shift_boundaries may need it. */ | |
532 i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n') | |
533 && | |
534 (buffer1 == p1 || p1[-1] == '\n')); | |
535 while (i-- && p0 != end0) | |
536 while (*p0++ != '\n') | |
537 continue; | |
538 | |
539 p1 += p0 - beg0; | |
540 } | |
541 | |
542 /* Record the suffix. */ | |
543 filevec[0].suffix_begin = p0; | |
544 filevec[1].suffix_begin = p1; | |
545 | |
546 /* Calculate number of lines of prefix to save. | |
547 | |
548 prefix_count == 0 means save the whole prefix; | |
549 we need this for options like -D that output the whole file, | |
550 or for enormous contexts (to avoid worrying about arithmetic overflow). | |
551 We also need it for options like -F that output some preceding line; | |
552 at least we will need to find the last few lines, | |
553 but since we don't know how many, it's easiest to find them all. | |
554 | |
555 Otherwise, prefix_count != 0. Save just prefix_count lines at start | |
556 of the line buffer; they'll be moved to the proper location later. | |
557 Handle 1 more line than the context says (because we count 1 too many), | |
558 rounded up to the next power of 2 to speed index computation. */ | |
559 | |
560 if (no_diff_means_no_output | |
561 && context < int(LIN_MAX / 4) && context < int(n0)) | |
562 { | |
563 middle_guess = guess_lines (0, 0, p0 - filevec[0].prefix_end); | |
564 suffix_guess = guess_lines (0, 0, buffer0 + n0 - p0); | |
565 for (prefix_count = 1; prefix_count <= context; prefix_count *= 2) | |
566 continue; | |
567 alloc_lines0 = (prefix_count + middle_guess | |
568 + MIN (context, suffix_guess)); | |
569 } | |
570 else | |
571 { | |
572 prefix_count = 0; | |
573 alloc_lines0 = guess_lines (0, 0, n0); | |
574 } | |
575 | |
576 prefix_mask = prefix_count - 1; | |
577 lines = 0; | |
578 linbuf0 = (const char**) xmalloc (alloc_lines0 * sizeof *linbuf0); | |
579 p0 = buffer0; | |
580 | |
581 /* If the prefix is needed, find the prefix lines. */ | |
582 if (! (no_diff_means_no_output | |
583 && filevec[0].prefix_end == p0 | |
584 && filevec[1].prefix_end == p1)) | |
585 { | |
586 end0 = filevec[0].prefix_end; | |
587 while (p0 != end0) | |
588 { | |
589 lin l = lines++ & prefix_mask; | |
590 if (l == alloc_lines0) | |
591 { | |
592 if (int(PTRDIFF_MAX / (2 * sizeof *linbuf0)) <= alloc_lines0) | |
593 xalloc_die (); | |
594 alloc_lines0 *= 2; | |
595 linbuf0 = (const char**) xrealloc (linbuf0, alloc_lines0 * sizeof *linbuf0); | |
596 } | |
597 linbuf0[l] = p0; | |
598 while (*p0++ != '\n') | |
599 continue; | |
600 } | |
601 } | |
602 buffered_prefix = prefix_count && context < lines ? context : lines; | |
603 | |
604 /* Allocate line buffer 1. */ | |
605 | |
606 middle_guess = guess_lines (lines, p0 - buffer0, p1 - filevec[1].prefix_end); | |
607 suffix_guess = guess_lines (lines, p0 - buffer0, buffer1 + n1 - p1); | |
608 alloc_lines1 = buffered_prefix + middle_guess + MIN (context, suffix_guess); | |
609 if (alloc_lines1 < buffered_prefix | |
610 || int(PTRDIFF_MAX / sizeof *linbuf1) <= alloc_lines1) | |
611 xalloc_die (); | |
612 linbuf1 = (const char**)xmalloc (alloc_lines1 * sizeof *linbuf1); | |
613 | |
614 if (buffered_prefix != lines) | |
615 { | |
616 /* Rotate prefix lines to proper location. */ | |
617 for (i = 0; i < buffered_prefix; i++) | |
618 linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask]; | |
619 for (i = 0; i < buffered_prefix; i++) | |
620 linbuf0[i] = linbuf1[i]; | |
621 } | |
622 | |
623 /* Initialize line buffer 1 from line buffer 0. */ | |
624 for (i = 0; i < buffered_prefix; i++) | |
625 linbuf1[i] = linbuf0[i] - buffer0 + buffer1; | |
626 | |
627 /* Record the line buffer, adjusted so that | |
628 linbuf[0] points at the first differing line. */ | |
629 filevec[0].linbuf = linbuf0 + buffered_prefix; | |
630 filevec[1].linbuf = linbuf1 + buffered_prefix; | |
631 filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix; | |
632 filevec[0].alloc_lines = alloc_lines0 - buffered_prefix; | |
633 filevec[1].alloc_lines = alloc_lines1 - buffered_prefix; | |
634 filevec[0].prefix_lines = filevec[1].prefix_lines = lines; | |
635 } | |
636 | |
637 /* If 1 < k, then (2**k - prime_offset[k]) is the largest prime less | |
638 than 2**k. This table is derived from Chris K. Caldwell's list | |
639 <http://www.utm.edu/research/primes/lists/2small/>. */ | |
640 | |
641 static unsigned char const prime_offset[] = | |
642 { | |
643 0, 0, 1, 1, 3, 1, 3, 1, 5, 3, 3, 9, 3, 1, 3, 19, 15, 1, 5, 1, 3, 9, 3, | |
644 15, 3, 39, 5, 39, 57, 3, 35, 1, 5, 9, 41, 31, 5, 25, 45, 7, 87, 21, | |
645 11, 57, 17, 55, 21, 115, 59, 81, 27, 129, 47, 111, 33, 55, 5, 13, 27, | |
646 55, 93, 1, 57, 25 | |
647 }; | |
648 | |
649 /* Verify that this host's size_t is not too wide for the above table. */ | |
650 | |
651 verify (enough_prime_offsets, | |
652 sizeof (size_t) * CHAR_BIT <= sizeof prime_offset); | |
653 | |
654 /* Given a vector of two file_data objects, read the file associated | |
655 with each one, and build the table of equivalence classes. | |
656 Return nonzero if either file appears to be a binary file. | |
657 If PRETEND_BINARY is nonzero, pretend they are binary regardless. */ | |
658 | |
659 bool | |
660 read_files (struct file_data filevec[], bool /*pretend_binary*/) | |
661 { | |
662 int i; | |
663 | |
664 find_identical_ends (filevec); | |
665 | |
666 equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1; | |
667 if (int(PTRDIFF_MAX / sizeof *equivs) <= equivs_alloc) | |
668 xalloc_die (); | |
669 equivs = (equivclass*)xmalloc (equivs_alloc * sizeof *equivs); | |
670 /* Equivalence class 0 is permanently safe for lines that were not | |
671 hashed. Real equivalence classes start at 1. */ | |
672 equivs_index = 1; | |
673 | |
674 /* Allocate (one plus) a prime number of hash buckets. Use a prime | |
675 number between 1/3 and 2/3 of the value of equiv_allocs, | |
676 approximately. */ | |
677 for (i = 9; 1 << i < equivs_alloc / 3; i++) | |
678 continue; | |
679 nbuckets = ((size_t) 1 << i) - prime_offset[i]; | |
680 if (PTRDIFF_MAX / sizeof *buckets <= nbuckets) | |
681 xalloc_die (); | |
682 buckets = (lin*)zalloc ((nbuckets + 1) * sizeof *buckets); | |
683 buckets++; | |
684 | |
685 for (i = 0; i < 2; i++) | |
686 find_and_hash_each_line (&filevec[i]); | |
687 | |
688 filevec[0].equiv_max = filevec[1].equiv_max = equivs_index; | |
689 | |
690 free (equivs); | |
691 free (buckets - 1); | |
692 | |
693 return 0; | |
694 } | |
695 | |
696 } // namespace GnuDiff |