easyhg-kdiff3: kdiff3/src-QT4/gnudiff

annotate kdiff3/src-QT4/gnudiff_analyze.cpp @ 113:7bca1f1340f6 tip

Build fixes for Xcode 10 / Qt 5.12

author	Chris Cannam
date	Mon, 17 Dec 2018 11:13:01 +0000
parents	08ea9b86c12c
children

rev	line source
joachim99@52	1 /* Analyze file differences for GNU DIFF.
joachim99@52	2
joachim99@52	3 Modified for KDiff3 by Joachim Eibl 2003.
joachim99@53	4 The original file was part of GNU DIFF.
joachim99@52	5
joachim99@52	6 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002
joachim99@52	7 Free Software Foundation, Inc.
joachim99@52	8
joachim99@52	9 GNU DIFF is free software; you can redistribute it and/or modify
joachim99@52	10 it under the terms of the GNU General Public License as published by
joachim99@52	11 the Free Software Foundation; either version 2, or (at your option)
joachim99@52	12 any later version.
joachim99@52	13
joachim99@52	14 GNU DIFF is distributed in the hope that it will be useful,
joachim99@52	15 but WITHOUT ANY WARRANTY; without even the implied warranty of
joachim99@52	16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
joachim99@52	17 GNU General Public License for more details.
joachim99@52	18
joachim99@52	19 You should have received a copy of the GNU General Public License
joachim99@52	20 along with this program; see the file COPYING.
joachim99@52	21 If not, write to the Free Software Foundation,
joachim99@69	22 51 Franklin Steet, Fifth Floor, Boston, MA 02110-1301, USA. */
joachim99@52	23
joachim99@52	24 /* The basic algorithm is described in:
joachim99@52	25 "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
joachim99@52	26 Algorithmica Vol. 1 No. 2, 1986, pp. 251-266;
joachim99@52	27 see especially section 4.2, which describes the variation used below.
joachim99@52	28 Unless the --minimal option is specified, this code uses the TOO_EXPENSIVE
joachim99@52	29 heuristic, by Paul Eggert, to limit the cost to O(N**1.5 log N)
joachim99@52	30 at the price of producing suboptimal output for large inputs with
joachim99@52	31 many differences.
joachim99@52	32
joachim99@52	33 The basic algorithm was independently discovered as described in:
joachim99@52	34 "Algorithms for Approximate String Matching", E. Ukkonen,
joachim99@52	35 Information and Control Vol. 64, 1985, pp. 100-118. */
joachim99@52	36
joachim99@52	37 #define GDIFF_MAIN
joachim99@52	38
joachim99@52	39 #include "gnudiff_diff.h"
joachim99@52	40 //#include <error.h>
joachim99@52	41 #include <stdlib.h>
joachim99@52	42
joachim99@52	43 static lin xvec, yvec; /* Vectors being compared. */
joachim99@52	44 static lin fdiag; / Vector, indexed by diagonal, containing
joachim99@52	45 1 + the X coordinate of the point furthest
joachim99@52	46 along the given diagonal in the forward
joachim99@52	47 search of the edit matrix. */
joachim99@52	48 static lin bdiag; / Vector, indexed by diagonal, containing
joachim99@52	49 the X coordinate of the point furthest
joachim99@52	50 along the given diagonal in the backward
joachim99@52	51 search of the edit matrix. */
joachim99@52	52 static lin too_expensive; /* Edit scripts longer than this are too
joachim99@52	53 expensive to compute. */
joachim99@52	54
joachim99@52	55 #define SNAKE_LIMIT 20 /* Snakes bigger than this are considered `big'. */
joachim99@53	56
joachim99@52	57
joachim99@52	58 struct partition
joachim99@52	59 {
joachim99@52	60 lin xmid, ymid; /* Midpoints of this partition. */
joachim99@52	61 bool lo_minimal; /* Nonzero if low half will be analyzed minimally. */
joachim99@52	62 bool hi_minimal; /* Likewise for high half. */
joachim99@52	63 };
joachim99@52	64
joachim99@52	65 /* Find the midpoint of the shortest edit script for a specified
joachim99@52	66 portion of the two files.
joachim99@52	67
joachim99@52	68 Scan from the beginnings of the files, and simultaneously from the ends,
joachim99@52	69 doing a breadth-first search through the space of edit-sequence.
joachim99@52	70 When the two searches meet, we have found the midpoint of the shortest
joachim99@52	71 edit sequence.
joachim99@52	72
joachim99@52	73 If FIND_MINIMAL is nonzero, find the minimal edit script regardless
joachim99@52	74 of expense. Otherwise, if the search is too expensive, use
joachim99@52	75 heuristics to stop the search and report a suboptimal answer.
joachim99@52	76
joachim99@52	77 Set PART->(xmid,ymid) to the midpoint (XMID,YMID). The diagonal number
joachim99@52	78 XMID - YMID equals the number of inserted lines minus the number
joachim99@52	79 of deleted lines (counting only lines before the midpoint).
joachim99@52	80 Return the approximate edit cost; this is the total number of
joachim99@52	81 lines inserted or deleted (counting only lines before the midpoint),
joachim99@52	82 unless a heuristic is used to terminate the search prematurely.
joachim99@52	83
joachim99@52	84 Set PART->lo_minimal to true iff the minimal edit script for the
joachim99@52	85 left half of the partition is known; similarly for PART->hi_minimal.
joachim99@52	86
joachim99@52	87 This function assumes that the first lines of the specified portions
joachim99@52	88 of the two files do not match, and likewise that the last lines do not
joachim99@52	89 match. The caller must trim matching lines from the beginning and end
joachim99@52	90 of the portions it is going to specify.
joachim99@52	91
joachim99@52	92 If we return the "wrong" partitions,
joachim99@52	93 the worst this can do is cause suboptimal diff output.
joachim99@52	94 It cannot cause incorrect diff output. */
joachim99@52	95
joachim99@53	96 lin
joachim99@53	97 GnuDiff::diag (lin xoff, lin xlim, lin yoff, lin ylim, bool find_minimal,
joachim99@52	98 struct partition *part)
joachim99@52	99 {
joachim99@52	100 lin const fd = fdiag; / Give the compiler a chance. */
joachim99@52	101 lin const bd = bdiag; / Additional help for the compiler. */
joachim99@52	102 lin const const xv = xvec; / Still more help for the compiler. */
joachim99@52	103 lin const const yv = yvec; / And more and more . . . */
joachim99@52	104 lin const dmin = xoff - ylim; /* Minimum valid diagonal. */
joachim99@52	105 lin const dmax = xlim - yoff; /* Maximum valid diagonal. */
joachim99@52	106 lin const fmid = xoff - yoff; /* Center diagonal of top-down search. */
joachim99@52	107 lin const bmid = xlim - ylim; /* Center diagonal of bottom-up search. */
joachim99@52	108 lin fmin = fmid, fmax = fmid; /* Limits of top-down search. */
joachim99@52	109 lin bmin = bmid, bmax = bmid; /* Limits of bottom-up search. */
joachim99@52	110 lin c; /* Cost. */
joachim99@52	111 bool odd = (fmid - bmid) & 1; /* True if southeast corner is on an odd
joachim99@52	112 diagonal with respect to the northwest. */
joachim99@52	113
joachim99@52	114 fd[fmid] = xoff;
joachim99@52	115 bd[bmid] = xlim;
joachim99@52	116
joachim99@52	117 for (c = 1;; ++c)
joachim99@52	118 {
joachim99@52	119 lin d; /* Active diagonal. */
joachim99@52	120 bool big_snake = 0;
joachim99@52	121
joachim99@52	122 /* Extend the top-down search by an edit step in each diagonal. */
joachim99@52	123 fmin > dmin ? fd[--fmin - 1] = -1 : ++fmin;
joachim99@52	124 fmax < dmax ? fd[++fmax + 1] = -1 : --fmax;
joachim99@52	125 for (d = fmax; d >= fmin; d -= 2)
joachim99@52	126 {
joachim99@52	127 lin x, y, oldx, tlo = fd[d - 1], thi = fd[d + 1];
joachim99@52	128
joachim99@52	129 if (tlo >= thi)
joachim99@52	130 x = tlo + 1;
joachim99@52	131 else
joachim99@52	132 x = thi;
joachim99@52	133 oldx = x;
joachim99@52	134 y = x - d;
joachim99@52	135 while (x < xlim && y < ylim && xv[x] == yv[y])
joachim99@52	136 ++x, ++y;
joachim99@52	137 if (x - oldx > SNAKE_LIMIT)
joachim99@52	138 big_snake = 1;
joachim99@52	139 fd[d] = x;
joachim99@52	140 if (odd && bmin <= d && d <= bmax && bd[d] <= x)
joachim99@52	141 {
joachim99@52	142 part->xmid = x;
joachim99@52	143 part->ymid = y;
joachim99@52	144 part->lo_minimal = part->hi_minimal = 1;
joachim99@52	145 return 2 * c - 1;
joachim99@52	146 }
joachim99@52	147 }
joachim99@52	148
joachim99@52	149 /* Similarly extend the bottom-up search. */
joachim99@52	150 bmin > dmin ? bd[--bmin - 1] = LIN_MAX : ++bmin;
joachim99@52	151 bmax < dmax ? bd[++bmax + 1] = LIN_MAX : --bmax;
joachim99@52	152 for (d = bmax; d >= bmin; d -= 2)
joachim99@52	153 {
joachim99@52	154 lin x, y, oldx, tlo = bd[d - 1], thi = bd[d + 1];
joachim99@52	155
joachim99@52	156 if (tlo < thi)
joachim99@52	157 x = tlo;
joachim99@52	158 else
joachim99@52	159 x = thi - 1;
joachim99@52	160 oldx = x;
joachim99@52	161 y = x - d;
joachim99@52	162 while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1])
joachim99@52	163 --x, --y;
joachim99@52	164 if (oldx - x > SNAKE_LIMIT)
joachim99@52	165 big_snake = 1;
joachim99@52	166 bd[d] = x;
joachim99@52	167 if (!odd && fmin <= d && d <= fmax && x <= fd[d])
joachim99@52	168 {
joachim99@52	169 part->xmid = x;
joachim99@52	170 part->ymid = y;
joachim99@52	171 part->lo_minimal = part->hi_minimal = 1;
joachim99@52	172 return 2 * c;
joachim99@52	173 }
joachim99@52	174 }
joachim99@52	175
joachim99@52	176 if (find_minimal)
joachim99@52	177 continue;
joachim99@52	178
joachim99@52	179 /* Heuristic: check occasionally for a diagonal that has made
joachim99@52	180 lots of progress compared with the edit distance.
joachim99@52	181 If we have any such, find the one that has made the most
joachim99@52	182 progress and return it as if it had succeeded.
joachim99@52	183
joachim99@52	184 With this heuristic, for files with a constant small density
joachim99@52	185 of changes, the algorithm is linear in the file size. */
joachim99@52	186
joachim99@52	187 if (200 < c && big_snake && speed_large_files)
joachim99@52	188 {
joachim99@52	189 lin best;
joachim99@52	190
joachim99@52	191 best = 0;
joachim99@52	192 for (d = fmax; d >= fmin; d -= 2)
joachim99@52	193 {
joachim99@52	194 lin dd = d - fmid;
joachim99@52	195 lin x = fd[d];
joachim99@52	196 lin y = x - d;
joachim99@52	197 lin v = (x - xoff) * 2 - dd;
joachim99@52	198 if (v > 12 * (c + (dd < 0 ? -dd : dd)))
joachim99@52	199 {
joachim99@52	200 if (v > best
joachim99@52	201 && xoff + SNAKE_LIMIT <= x && x < xlim
joachim99@52	202 && yoff + SNAKE_LIMIT <= y && y < ylim)
joachim99@52	203 {
joachim99@52	204 /* We have a good enough best diagonal;
joachim99@52	205 now insist that it end with a significant snake. */
joachim99@52	206 int k;
joachim99@52	207
joachim99@52	208 for (k = 1; xv[x - k] == yv[y - k]; k++)
joachim99@52	209 if (k == SNAKE_LIMIT)
joachim99@52	210 {
joachim99@52	211 best = v;
joachim99@52	212 part->xmid = x;
joachim99@52	213 part->ymid = y;
joachim99@52	214 break;
joachim99@52	215 }
joachim99@52	216 }
joachim99@52	217 }
joachim99@52	218 }
joachim99@52	219 if (best > 0)
joachim99@52	220 {
joachim99@52	221 part->lo_minimal = 1;
joachim99@52	222 part->hi_minimal = 0;
joachim99@52	223 return 2 * c - 1;
joachim99@52	224 }
joachim99@52	225
joachim99@52	226 best = 0;
joachim99@52	227 for (d = bmax; d >= bmin; d -= 2)
joachim99@52	228 {
joachim99@52	229 lin dd = d - bmid;
joachim99@52	230 lin x = bd[d];
joachim99@52	231 lin y = x - d;
joachim99@52	232 lin v = (xlim - x) * 2 + dd;
joachim99@52	233 if (v > 12 * (c + (dd < 0 ? -dd : dd)))
joachim99@52	234 {
joachim99@52	235 if (v > best
joachim99@52	236 && xoff < x && x <= xlim - SNAKE_LIMIT
joachim99@52	237 && yoff < y && y <= ylim - SNAKE_LIMIT)
joachim99@52	238 {
joachim99@52	239 /* We have a good enough best diagonal;
joachim99@52	240 now insist that it end with a significant snake. */
joachim99@52	241 int k;
joachim99@52	242
joachim99@52	243 for (k = 0; xv[x + k] == yv[y + k]; k++)
joachim99@52	244 if (k == SNAKE_LIMIT - 1)
joachim99@52	245 {
joachim99@52	246 best = v;
joachim99@52	247 part->xmid = x;
joachim99@52	248 part->ymid = y;
joachim99@52	249 break;
joachim99@52	250 }
joachim99@52	251 }
joachim99@52	252 }
joachim99@52	253 }
joachim99@52	254 if (best > 0)
joachim99@52	255 {
joachim99@52	256 part->lo_minimal = 0;
joachim99@52	257 part->hi_minimal = 1;
joachim99@52	258 return 2 * c - 1;
joachim99@52	259 }
joachim99@52	260 }
joachim99@52	261
joachim99@52	262 /* Heuristic: if we've gone well beyond the call of duty,
joachim99@52	263 give up and report halfway between our best results so far. */
joachim99@52	264 if (c >= too_expensive)
joachim99@52	265 {
joachim99@52	266 lin fxybest, fxbest;
joachim99@52	267 lin bxybest, bxbest;
joachim99@52	268
joachim99@52	269 fxbest = bxbest = 0; /* Pacify `gcc -Wall'. */
joachim99@52	270
joachim99@52	271 /* Find forward diagonal that maximizes X + Y. */
joachim99@52	272 fxybest = -1;
joachim99@52	273 for (d = fmax; d >= fmin; d -= 2)
joachim99@52	274 {
joachim99@52	275 lin x = MIN (fd[d], xlim);
joachim99@52	276 lin y = x - d;
joachim99@52	277 if (ylim < y)
joachim99@52	278 x = ylim + d, y = ylim;
joachim99@52	279 if (fxybest < x + y)
joachim99@52	280 {
joachim99@52	281 fxybest = x + y;
joachim99@52	282 fxbest = x;
joachim99@52	283 }
joachim99@52	284 }
joachim99@52	285
joachim99@52	286 /* Find backward diagonal that minimizes X + Y. */
joachim99@52	287 bxybest = LIN_MAX;
joachim99@52	288 for (d = bmax; d >= bmin; d -= 2)
joachim99@52	289 {
joachim99@52	290 lin x = MAX (xoff, bd[d]);
joachim99@52	291 lin y = x - d;
joachim99@52	292 if (y < yoff)
joachim99@52	293 x = yoff + d, y = yoff;
joachim99@52	294 if (x + y < bxybest)
joachim99@52	295 {
joachim99@52	296 bxybest = x + y;
joachim99@52	297 bxbest = x;
joachim99@52	298 }
joachim99@52	299 }
joachim99@52	300
joachim99@52	301 /* Use the better of the two diagonals. */
joachim99@52	302 if ((xlim + ylim) - bxybest < fxybest - (xoff + yoff))
joachim99@52	303 {
joachim99@52	304 part->xmid = fxbest;
joachim99@52	305 part->ymid = fxybest - fxbest;
joachim99@52	306 part->lo_minimal = 1;
joachim99@52	307 part->hi_minimal = 0;
joachim99@52	308 }
joachim99@52	309 else
joachim99@52	310 {
joachim99@52	311 part->xmid = bxbest;
joachim99@52	312 part->ymid = bxybest - bxbest;
joachim99@52	313 part->lo_minimal = 0;
joachim99@52	314 part->hi_minimal = 1;
joachim99@52	315 }
joachim99@52	316 return 2 * c - 1;
joachim99@52	317 }
joachim99@52	318 }
joachim99@52	319 }
joachim99@52	320
joachim99@52	321 /* Compare in detail contiguous subsequences of the two files
joachim99@52	322 which are known, as a whole, to match each other.
joachim99@52	323
joachim99@52	324 The results are recorded in the vectors files[N].changed, by
joachim99@52	325 storing 1 in the element for each line that is an insertion or deletion.
joachim99@52	326
joachim99@52	327 The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
joachim99@52	328
joachim99@52	329 Note that XLIM, YLIM are exclusive bounds.
joachim99@52	330 All line numbers are origin-0 and discarded lines are not counted.
joachim99@52	331
joachim99@52	332 If FIND_MINIMAL, find a minimal difference no matter how
joachim99@52	333 expensive it is. */
joachim99@52	334
joachim99@53	335 void GnuDiff::compareseq (lin xoff, lin xlim, lin yoff, lin ylim, bool find_minimal)
joachim99@52	336 {
joachim99@52	337 lin * const xv = xvec; /* Help the compiler. */
joachim99@52	338 lin * const yv = yvec;
joachim99@52	339
joachim99@52	340 /* Slide down the bottom initial diagonal. */
joachim99@52	341 while (xoff < xlim && yoff < ylim && xv[xoff] == yv[yoff])
joachim99@52	342 ++xoff, ++yoff;
joachim99@52	343 /* Slide up the top initial diagonal. */
joachim99@52	344 while (xlim > xoff && ylim > yoff && xv[xlim - 1] == yv[ylim - 1])
joachim99@52	345 --xlim, --ylim;
joachim99@52	346
joachim99@52	347 /* Handle simple cases. */
joachim99@52	348 if (xoff == xlim)
joachim99@52	349 while (yoff < ylim)
joachim99@52	350 files[1].changed[files[1].realindexes[yoff++]] = 1;
joachim99@52	351 else if (yoff == ylim)
joachim99@52	352 while (xoff < xlim)
joachim99@52	353 files[0].changed[files[0].realindexes[xoff++]] = 1;
joachim99@52	354 else
joachim99@52	355 {
joachim99@52	356 lin c;
joachim99@52	357 struct partition part;
joachim99@52	358
joachim99@52	359 /* Find a point of correspondence in the middle of the files. */
joachim99@52	360
joachim99@52	361 c = diag (xoff, xlim, yoff, ylim, find_minimal, &part);
joachim99@52	362
joachim99@52	363 if (c == 1)
joachim99@52	364 {
joachim99@52	365 /* This should be impossible, because it implies that
joachim99@52	366 one of the two subsequences is empty,
joachim99@52	367 and that case was handled above without calling `diag'.
joachim99@52	368 Let's verify that this is true. */
joachim99@52	369 abort ();
joachim99@52	370 #if 0
joachim99@52	371 /* The two subsequences differ by a single insert or delete;
joachim99@52	372 record it and we are done. */
joachim99@52	373 if (part.xmid - part.ymid < xoff - yoff)
joachim99@52	374 files[1].changed[files[1].realindexes[part.ymid - 1]] = 1;
joachim99@52	375 else
joachim99@52	376 files[0].changed[files[0].realindexes[part.xmid]] = 1;
joachim99@52	377 #endif
joachim99@52	378 }
joachim99@52	379 else
joachim99@52	380 {
joachim99@52	381 /* Use the partitions to split this problem into subproblems. */
joachim99@52	382 compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal);
joachim99@52	383 compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal);
joachim99@52	384 }
joachim99@52	385 }
joachim99@52	386 }
joachim99@52	387
joachim99@52	388 /* Discard lines from one file that have no matches in the other file.
joachim99@52	389
joachim99@52	390 A line which is discarded will not be considered by the actual
joachim99@52	391 comparison algorithm; it will be as if that line were not in the file.
joachim99@52	392 The file's `realindexes' table maps virtual line numbers
joachim99@52	393 (which don't count the discarded lines) into real line numbers;
joachim99@52	394 this is how the actual comparison algorithm produces results
joachim99@52	395 that are comprehensible when the discarded lines are counted.
joachim99@52	396
joachim99@52	397 When we discard a line, we also mark it as a deletion or insertion
joachim99@52	398 so that it will be printed in the output. */
joachim99@52	399
joachim99@53	400 void GnuDiff::discard_confusing_lines (struct file_data filevec[])
joachim99@52	401 {
joachim99@52	402 int f;
joachim99@52	403 lin i;
joachim99@52	404 char *discarded[2];
joachim99@52	405 lin *equiv_count[2];
joachim99@52	406 lin *p;
joachim99@52	407
joachim99@52	408 /* Allocate our results. */
joachim99@52	409 p = (lin*)xmalloc ((filevec[0].buffered_lines + filevec[1].buffered_lines)
joachim99@52	410 * (2 * sizeof *p));
joachim99@52	411 for (f = 0; f < 2; f++)
joachim99@52	412 {
joachim99@52	413 filevec[f].undiscarded = p; p += filevec[f].buffered_lines;
joachim99@52	414 filevec[f].realindexes = p; p += filevec[f].buffered_lines;
joachim99@52	415 }
joachim99@52	416
joachim99@52	417 /* Set up equiv_count[F][I] as the number of lines in file F
joachim99@52	418 that fall in equivalence class I. */
joachim99@52	419
joachim99@52	420 p = (lin)zalloc (filevec[0].equiv_max (2 * sizeof *p));
joachim99@52	421 equiv_count[0] = p;
joachim99@52	422 equiv_count[1] = p + filevec[0].equiv_max;
joachim99@52	423
joachim99@52	424 for (i = 0; i < filevec[0].buffered_lines; ++i)
joachim99@52	425 ++equiv_count[0][filevec[0].equivs[i]];
joachim99@52	426 for (i = 0; i < filevec[1].buffered_lines; ++i)
joachim99@52	427 ++equiv_count[1][filevec[1].equivs[i]];
joachim99@52	428
joachim99@52	429 /* Set up tables of which lines are going to be discarded. */
joachim99@52	430
joachim99@52	431 discarded[0] = (char*)zalloc (filevec[0].buffered_lines
joachim99@52	432 + filevec[1].buffered_lines);
joachim99@52	433 discarded[1] = discarded[0] + filevec[0].buffered_lines;
joachim99@52	434
joachim99@52	435 /* Mark to be discarded each line that matches no line of the other file.
joachim99@52	436 If a line matches many lines, mark it as provisionally discardable. */
joachim99@52	437
joachim99@52	438 for (f = 0; f < 2; f++)
joachim99@52	439 {
joachim99@52	440 size_t end = filevec[f].buffered_lines;
joachim99@52	441 char *discards = discarded[f];
joachim99@52	442 lin *counts = equiv_count[1 - f];
joachim99@52	443 lin *equivs = filevec[f].equivs;
joachim99@52	444 size_t many = 5;
joachim99@52	445 size_t tem = end / 64;
joachim99@52	446
joachim99@52	447 /* Multiply MANY by approximate square root of number of lines.
joachim99@52	448 That is the threshold for provisionally discardable lines. */
joachim99@52	449 while ((tem = tem >> 2) > 0)
joachim99@52	450 many *= 2;
joachim99@52	451
joachim99@66	452 for (i = 0; i < (lin)end; i++)
joachim99@52	453 {
joachim99@52	454 lin nmatch;
joachim99@52	455 if (equivs[i] == 0)
joachim99@52	456 continue;
joachim99@52	457 nmatch = counts[equivs[i]];
joachim99@52	458 if (nmatch == 0)
joachim99@52	459 discards[i] = 1;
joachim99@66	460 else if (nmatch > (lin)many)
joachim99@52	461 discards[i] = 2;
joachim99@52	462 }
joachim99@52	463 }
joachim99@52	464
joachim99@52	465 /* Don't really discard the provisional lines except when they occur
joachim99@52	466 in a run of discardables, with nonprovisionals at the beginning
joachim99@52	467 and end. */
joachim99@52	468
joachim99@52	469 for (f = 0; f < 2; f++)
joachim99@52	470 {
joachim99@52	471 lin end = filevec[f].buffered_lines;
joachim99@52	472 register char *discards = discarded[f];
joachim99@52	473
joachim99@52	474 for (i = 0; i < end; i++)
joachim99@52	475 {
joachim99@52	476 /* Cancel provisional discards not in middle of run of discards. */
joachim99@52	477 if (discards[i] == 2)
joachim99@52	478 discards[i] = 0;
joachim99@52	479 else if (discards[i] != 0)
joachim99@52	480 {
joachim99@52	481 /* We have found a nonprovisional discard. */
joachim99@52	482 register lin j;
joachim99@52	483 lin length;
joachim99@52	484 lin provisional = 0;
joachim99@52	485
joachim99@52	486 /* Find end of this run of discardable lines.
joachim99@52	487 Count how many are provisionally discardable. */
joachim99@52	488 for (j = i; j < end; j++)
joachim99@52	489 {
joachim99@52	490 if (discards[j] == 0)
joachim99@52	491 break;
joachim99@52	492 if (discards[j] == 2)
joachim99@52	493 ++provisional;
joachim99@52	494 }
joachim99@52	495
joachim99@52	496 /* Cancel provisional discards at end, and shrink the run. */
joachim99@52	497 while (j > i && discards[j - 1] == 2)
joachim99@52	498 discards[--j] = 0, --provisional;
joachim99@52	499
joachim99@52	500 /* Now we have the length of a run of discardable lines
joachim99@52	501 whose first and last are not provisional. */
joachim99@52	502 length = j - i;
joachim99@52	503
joachim99@52	504 /* If 1/4 of the lines in the run are provisional,
joachim99@52	505 cancel discarding of all provisional lines in the run. */
joachim99@52	506 if (provisional * 4 > length)
joachim99@52	507 {
joachim99@52	508 while (j > i)
joachim99@52	509 if (discards[--j] == 2)
joachim99@52	510 discards[j] = 0;
joachim99@52	511 }
joachim99@52	512 else
joachim99@52	513 {
joachim99@52	514 register lin consec;
joachim99@52	515 lin minimum = 1;
joachim99@52	516 lin tem = length >> 2;
joachim99@52	517
joachim99@52	518 /* MINIMUM is approximate square root of LENGTH/4.
joachim99@52	519 A subrun of two or more provisionals can stand
joachim99@52	520 when LENGTH is at least 16.
joachim99@52	521 A subrun of 4 or more can stand when LENGTH >= 64. */
joachim99@52	522 while (0 < (tem >>= 2))
joachim99@52	523 minimum <<= 1;
joachim99@52	524 minimum++;
joachim99@52	525
joachim99@52	526 /* Cancel any subrun of MINIMUM or more provisionals
joachim99@52	527 within the larger run. */
joachim99@52	528 for (j = 0, consec = 0; j < length; j++)
joachim99@52	529 if (discards[i + j] != 2)
joachim99@52	530 consec = 0;
joachim99@52	531 else if (minimum == ++consec)
joachim99@52	532 /* Back up to start of subrun, to cancel it all. */
joachim99@52	533 j -= consec;
joachim99@52	534 else if (minimum < consec)
joachim99@52	535 discards[i + j] = 0;
joachim99@52	536
joachim99@52	537 /* Scan from beginning of run
joachim99@52	538 until we find 3 or more nonprovisionals in a row
joachim99@52	539 or until the first nonprovisional at least 8 lines in.
joachim99@52	540 Until that point, cancel any provisionals. */
joachim99@52	541 for (j = 0, consec = 0; j < length; j++)
joachim99@52	542 {
joachim99@52	543 if (j >= 8 && discards[i + j] == 1)
joachim99@52	544 break;
joachim99@52	545 if (discards[i + j] == 2)
joachim99@52	546 consec = 0, discards[i + j] = 0;
joachim99@52	547 else if (discards[i + j] == 0)
joachim99@52	548 consec = 0;
joachim99@52	549 else
joachim99@52	550 consec++;
joachim99@52	551 if (consec == 3)
joachim99@52	552 break;
joachim99@52	553 }
joachim99@52	554
joachim99@52	555 /* I advances to the last line of the run. */
joachim99@52	556 i += length - 1;
joachim99@52	557
joachim99@52	558 /* Same thing, from end. */
joachim99@52	559 for (j = 0, consec = 0; j < length; j++)
joachim99@52	560 {
joachim99@52	561 if (j >= 8 && discards[i - j] == 1)
joachim99@52	562 break;
joachim99@52	563 if (discards[i - j] == 2)
joachim99@52	564 consec = 0, discards[i - j] = 0;
joachim99@52	565 else if (discards[i - j] == 0)
joachim99@52	566 consec = 0;
joachim99@52	567 else
joachim99@52	568 consec++;
joachim99@52	569 if (consec == 3)
joachim99@52	570 break;
joachim99@52	571 }
joachim99@52	572 }
joachim99@52	573 }
joachim99@52	574 }
joachim99@52	575 }
joachim99@52	576
joachim99@52	577 /* Actually discard the lines. */
joachim99@52	578 for (f = 0; f < 2; f++)
joachim99@52	579 {
joachim99@52	580 char *discards = discarded[f];
joachim99@52	581 lin end = filevec[f].buffered_lines;
joachim99@52	582 lin j = 0;
joachim99@52	583 for (i = 0; i < end; ++i)
joachim99@52	584 if (minimal \|\| discards[i] == 0)
joachim99@52	585 {
joachim99@52	586 filevec[f].undiscarded[j] = filevec[f].equivs[i];
joachim99@52	587 filevec[f].realindexes[j++] = i;
joachim99@52	588 }
joachim99@52	589 else
joachim99@52	590 filevec[f].changed[i] = 1;
joachim99@52	591 filevec[f].nondiscarded_lines = j;
joachim99@52	592 }
joachim99@52	593
joachim99@52	594 free (discarded[0]);
joachim99@52	595 free (equiv_count[0]);
joachim99@52	596 }
joachim99@52	597
joachim99@52	598 /* Adjust inserts/deletes of identical lines to join changes
joachim99@52	599 as much as possible.
joachim99@52	600
joachim99@52	601 We do something when a run of changed lines include a
joachim99@52	602 line at one end and have an excluded, identical line at the other.
joachim99@52	603 We are free to choose which identical line is included.
joachim99@52	604 `compareseq' usually chooses the one at the beginning,
joachim99@52	605 but usually it is cleaner to consider the following identical line
joachim99@52	606 to be the "change". */
joachim99@52	607
joachim99@53	608 void GnuDiff::shift_boundaries (struct file_data filevec[])
joachim99@52	609 {
joachim99@52	610 int f;
joachim99@52	611
joachim99@52	612 for (f = 0; f < 2; f++)
joachim99@52	613 {
joachim99@52	614 bool *changed = filevec[f].changed;
joachim99@52	615 bool const *other_changed = filevec[1 - f].changed;
joachim99@52	616 lin const *equivs = filevec[f].equivs;
joachim99@52	617 lin i = 0;
joachim99@52	618 lin j = 0;
joachim99@52	619 lin i_end = filevec[f].buffered_lines;
joachim99@52	620
joachim99@52	621 while (1)
joachim99@52	622 {
joachim99@52	623 lin runlength, start, corresponding;
joachim99@52	624
joachim99@52	625 /* Scan forwards to find beginning of another run of changes.
joachim99@52	626 Also keep track of the corresponding point in the other file. */
joachim99@52	627
joachim99@52	628 while (i < i_end && !changed[i])
joachim99@52	629 {
joachim99@52	630 while (other_changed[j++])
joachim99@52	631 continue;
joachim99@52	632 i++;
joachim99@52	633 }
joachim99@52	634
joachim99@52	635 if (i == i_end)
joachim99@52	636 break;
joachim99@52	637
joachim99@52	638 start = i;
joachim99@52	639
joachim99@52	640 /* Find the end of this run of changes. */
joachim99@52	641
joachim99@52	642 while (changed[++i])
joachim99@52	643 continue;
joachim99@52	644 while (other_changed[j])
joachim99@52	645 j++;
joachim99@52	646
joachim99@52	647 do
joachim99@52	648 {
joachim99@52	649 /* Record the length of this run of changes, so that
joachim99@52	650 we can later determine whether the run has grown. */
joachim99@52	651 runlength = i - start;
joachim99@52	652
joachim99@52	653 /* Move the changed region back, so long as the
joachim99@52	654 previous unchanged line matches the last changed one.
joachim99@52	655 This merges with previous changed regions. */
joachim99@52	656
joachim99@52	657 while (start && equivs[start - 1] == equivs[i - 1])
joachim99@52	658 {
joachim99@52	659 changed[--start] = 1;
joachim99@52	660 changed[--i] = 0;
joachim99@52	661 while (changed[start - 1])
joachim99@52	662 start--;
joachim99@52	663 while (other_changed[--j])
joachim99@52	664 continue;
joachim99@52	665 }
joachim99@52	666
joachim99@52	667 /* Set CORRESPONDING to the end of the changed run, at the last
joachim99@52	668 point where it corresponds to a changed run in the other file.
joachim99@52	669 CORRESPONDING == I_END means no such point has been found. */
joachim99@52	670 corresponding = other_changed[j - 1] ? i : i_end;
joachim99@52	671
joachim99@52	672 /* Move the changed region forward, so long as the
joachim99@52	673 first changed line matches the following unchanged one.
joachim99@52	674 This merges with following changed regions.
joachim99@52	675 Do this second, so that if there are no merges,
joachim99@52	676 the changed region is moved forward as far as possible. */
joachim99@52	677
joachim99@52	678 while (i != i_end && equivs[start] == equivs[i])
joachim99@52	679 {
joachim99@52	680 changed[start++] = 0;
joachim99@52	681 changed[i++] = 1;
joachim99@52	682 while (changed[i])
joachim99@52	683 i++;
joachim99@52	684 while (other_changed[++j])
joachim99@52	685 corresponding = i;
joachim99@52	686 }
joachim99@52	687 }
joachim99@52	688 while (runlength != i - start);
joachim99@52	689
joachim99@52	690 /* If possible, move the fully-merged run of changes
joachim99@52	691 back to a corresponding run in the other file. */
joachim99@52	692
joachim99@52	693 while (corresponding < i)
joachim99@52	694 {
joachim99@52	695 changed[--start] = 1;
joachim99@52	696 changed[--i] = 0;
joachim99@52	697 while (other_changed[--j])
joachim99@52	698 continue;
joachim99@52	699 }
joachim99@52	700 }
joachim99@52	701 }
joachim99@52	702 }
joachim99@52	703
joachim99@52	704 /* Cons an additional entry onto the front of an edit script OLD.
joachim99@52	705 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
joachim99@52	706 DELETED is the number of lines deleted here from file 0.
joachim99@52	707 INSERTED is the number of lines inserted here in file 1.
joachim99@52	708
joachim99@52	709 If DELETED is 0 then LINE0 is the number of the line before
joachim99@52	710 which the insertion was done; vice versa for INSERTED and LINE1. */
joachim99@52	711
joachim99@53	712 GnuDiff::change* GnuDiff::add_change (lin line0, lin line1, lin deleted, lin inserted, struct change *old)
joachim99@52	713 {
joachim99@52	714 struct change newChange = (change) xmalloc (sizeof *newChange);
joachim99@52	715
joachim99@52	716 newChange->line0 = line0;
joachim99@52	717 newChange->line1 = line1;
joachim99@52	718 newChange->inserted = inserted;
joachim99@52	719 newChange->deleted = deleted;
joachim99@52	720 newChange->link = old;
joachim99@52	721 return newChange;
joachim99@52	722 }
joachim99@52	723
joachim99@52	724 /* Scan the tables of which lines are inserted and deleted,
joachim99@52	725 producing an edit script in reverse order. */
joachim99@52	726
joachim99@53	727 GnuDiff::change* GnuDiff::build_reverse_script (struct file_data const filevec[])
joachim99@52	728 {
joachim99@52	729 struct change *script = 0;
joachim99@52	730 bool *changed0 = filevec[0].changed;
joachim99@52	731 bool *changed1 = filevec[1].changed;
joachim99@52	732 lin len0 = filevec[0].buffered_lines;
joachim99@52	733 lin len1 = filevec[1].buffered_lines;
joachim99@52	734
joachim99@52	735 /* Note that changedN[len0] does exist, and is 0. */
joachim99@52	736
joachim99@52	737 lin i0 = 0, i1 = 0;
joachim99@52	738
joachim99@52	739 while (i0 < len0 \|\| i1 < len1)
joachim99@52	740 {
joachim99@52	741 if (changed0[i0] \| changed1[i1])
joachim99@52	742 {
joachim99@52	743 lin line0 = i0, line1 = i1;
joachim99@52	744
joachim99@52	745 /* Find # lines changed here in each file. */
joachim99@52	746 while (changed0[i0]) ++i0;
joachim99@52	747 while (changed1[i1]) ++i1;
joachim99@52	748
joachim99@52	749 /* Record this change. */
joachim99@52	750 script = add_change (line0, line1, i0 - line0, i1 - line1, script);
joachim99@52	751 }
joachim99@52	752
joachim99@52	753 /* We have reached lines in the two files that match each other. */
joachim99@52	754 i0++, i1++;
joachim99@52	755 }
joachim99@52	756
joachim99@52	757 return script;
joachim99@52	758 }
joachim99@52	759
joachim99@52	760 /* Scan the tables of which lines are inserted and deleted,
joachim99@52	761 producing an edit script in forward order. */
joachim99@52	762
joachim99@53	763 GnuDiff::change* GnuDiff::build_script (struct file_data const filevec[])
joachim99@52	764 {
joachim99@52	765 struct change *script = 0;
joachim99@52	766 bool *changed0 = filevec[0].changed;
joachim99@52	767 bool *changed1 = filevec[1].changed;
joachim99@52	768 lin i0 = filevec[0].buffered_lines, i1 = filevec[1].buffered_lines;
joachim99@52	769
joachim99@52	770 /* Note that changedN[-1] does exist, and is 0. */
joachim99@52	771
joachim99@52	772 while (i0 >= 0 \|\| i1 >= 0)
joachim99@52	773 {
joachim99@52	774 if (changed0[i0 - 1] \| changed1[i1 - 1])
joachim99@52	775 {
joachim99@52	776 lin line0 = i0, line1 = i1;
joachim99@52	777
joachim99@52	778 /* Find # lines changed here in each file. */
joachim99@52	779 while (changed0[i0 - 1]) --i0;
joachim99@52	780 while (changed1[i1 - 1]) --i1;
joachim99@52	781
joachim99@52	782 /* Record this change. */
joachim99@52	783 script = add_change (i0, i1, line0 - i0, line1 - i1, script);
joachim99@52	784 }
joachim99@52	785
joachim99@52	786 /* We have reached lines in the two files that match each other. */
joachim99@52	787 i0--, i1--;
joachim99@52	788 }
joachim99@52	789
joachim99@52	790 return script;
joachim99@52	791 }
joachim99@52	792
joachim99@52	793
joachim99@52	794 /* Report the differences of two files. */
joachim99@53	795 GnuDiff::change* GnuDiff::diff_2_files (struct comparison *cmp)
joachim99@52	796 {
joachim99@52	797 lin diags;
joachim99@52	798 int f;
joachim99@52	799 //struct change e, p;
joachim99@52	800 struct change *script;
joachim99@52	801 int changes;
joachim99@52	802
joachim99@52	803 read_files (cmp->file, files_can_be_treated_as_binary);
joachim99@52	804
joachim99@52	805 {
joachim99@52	806 /* Allocate vectors for the results of comparison:
joachim99@52	807 a flag for each line of each file, saying whether that line
joachim99@52	808 is an insertion or deletion.
joachim99@52	809 Allocate an extra element, always 0, at each end of each vector. */
joachim99@52	810
joachim99@52	811 size_t s = cmp->file[0].buffered_lines + cmp->file[1].buffered_lines + 4;
joachim99@52	812 bool flag_space = (bool)zalloc (s * sizeof(*flag_space));
joachim99@52	813 cmp->file[0].changed = flag_space + 1;
joachim99@52	814 cmp->file[1].changed = flag_space + cmp->file[0].buffered_lines + 3;
joachim99@52	815
joachim99@52	816 /* Some lines are obviously insertions or deletions
joachim99@52	817 because they don't match anything. Detect them now, and
joachim99@52	818 avoid even thinking about them in the main comparison algorithm. */
joachim99@52	819
joachim99@52	820 discard_confusing_lines (cmp->file);
joachim99@52	821
joachim99@52	822 /* Now do the main comparison algorithm, considering just the
joachim99@52	823 undiscarded lines. */
joachim99@52	824
joachim99@52	825 xvec = cmp->file[0].undiscarded;
joachim99@52	826 yvec = cmp->file[1].undiscarded;
joachim99@52	827 diags = (cmp->file[0].nondiscarded_lines
joachim99@52	828 + cmp->file[1].nondiscarded_lines + 3);
joachim99@52	829 fdiag = (lin)xmalloc (diags (2 * sizeof *fdiag));
joachim99@52	830 bdiag = fdiag + diags;
joachim99@52	831 fdiag += cmp->file[1].nondiscarded_lines + 1;
joachim99@52	832 bdiag += cmp->file[1].nondiscarded_lines + 1;
joachim99@52	833
joachim99@52	834 /* Set TOO_EXPENSIVE to be approximate square root of input size,
joachim99@52	835 bounded below by 256. */
joachim99@52	836 too_expensive = 1;
joachim99@52	837 for (; diags != 0; diags >>= 2)
joachim99@52	838 too_expensive <<= 1;
joachim99@52	839 too_expensive = MAX (256, too_expensive);
joachim99@52	840
joachim99@52	841 files[0] = cmp->file[0];
joachim99@52	842 files[1] = cmp->file[1];
joachim99@52	843
joachim99@52	844 compareseq (0, cmp->file[0].nondiscarded_lines,
joachim99@52	845 0, cmp->file[1].nondiscarded_lines, minimal);
joachim99@52	846
joachim99@52	847 free (fdiag - (cmp->file[1].nondiscarded_lines + 1));
joachim99@52	848
joachim99@52	849 /* Modify the results slightly to make them prettier
joachim99@52	850 in cases where that can validly be done. */
joachim99@52	851
joachim99@52	852 shift_boundaries (cmp->file);
joachim99@52	853
joachim99@52	854 /* Get the results of comparison in the form of a chain
joachim99@69	855 of `struct change's -- an edit script. */
joachim99@52	856
joachim99@69	857 script = build_script (cmp->file);
joachim99@52	858
joachim99@52	859 changes = (script != 0);
joachim99@52	860
joachim99@52	861 free (cmp->file[0].undiscarded);
joachim99@52	862
joachim99@52	863 free (flag_space);
joachim99@52	864
joachim99@52	865 for (f = 0; f < 2; f++)
joachim99@52	866 {
joachim99@52	867 free (cmp->file[f].equivs);
joachim99@52	868 free (cmp->file[f].linbuf + cmp->file[f].linbuf_base);
joachim99@52	869 }
joachim99@52	870 }
joachim99@52	871
joachim99@52	872 return script;
joachim99@52	873 }

Mercurial > hg > easyhg-kdiff3

annotate kdiff3/src-QT4/gnudiff_analyze.cpp @ 113:7bca1f1340f6 tip