Chris@0
|
1 <?php
|
Chris@0
|
2 /*
|
Chris@0
|
3 * This file is part of the Diff package.
|
Chris@0
|
4 *
|
Chris@0
|
5 * (c) Sebastian Bergmann <sebastian@phpunit.de>
|
Chris@0
|
6 *
|
Chris@0
|
7 * For the full copyright and license information, please view the LICENSE
|
Chris@0
|
8 * file that was distributed with this source code.
|
Chris@0
|
9 */
|
Chris@0
|
10
|
Chris@0
|
11 namespace SebastianBergmann\Diff;
|
Chris@0
|
12
|
Chris@0
|
13 use SebastianBergmann\Diff\LCS\LongestCommonSubsequence;
|
Chris@0
|
14 use SebastianBergmann\Diff\LCS\TimeEfficientImplementation;
|
Chris@0
|
15 use SebastianBergmann\Diff\LCS\MemoryEfficientImplementation;
|
Chris@0
|
16
|
Chris@0
|
17 /**
|
Chris@0
|
18 * Diff implementation.
|
Chris@0
|
19 */
|
Chris@0
|
20 class Differ
|
Chris@0
|
21 {
|
Chris@0
|
22 /**
|
Chris@0
|
23 * @var string
|
Chris@0
|
24 */
|
Chris@0
|
25 private $header;
|
Chris@0
|
26
|
Chris@0
|
27 /**
|
Chris@0
|
28 * @var bool
|
Chris@0
|
29 */
|
Chris@0
|
30 private $showNonDiffLines;
|
Chris@0
|
31
|
Chris@0
|
32 /**
|
Chris@0
|
33 * @param string $header
|
Chris@0
|
34 */
|
Chris@0
|
35 public function __construct($header = "--- Original\n+++ New\n", $showNonDiffLines = true)
|
Chris@0
|
36 {
|
Chris@0
|
37 $this->header = $header;
|
Chris@0
|
38 $this->showNonDiffLines = $showNonDiffLines;
|
Chris@0
|
39 }
|
Chris@0
|
40
|
Chris@0
|
41 /**
|
Chris@0
|
42 * Returns the diff between two arrays or strings as string.
|
Chris@0
|
43 *
|
Chris@0
|
44 * @param array|string $from
|
Chris@0
|
45 * @param array|string $to
|
Chris@0
|
46 * @param LongestCommonSubsequence $lcs
|
Chris@0
|
47 *
|
Chris@0
|
48 * @return string
|
Chris@0
|
49 */
|
Chris@0
|
50 public function diff($from, $to, LongestCommonSubsequence $lcs = null)
|
Chris@0
|
51 {
|
Chris@0
|
52 if (!is_array($from) && !is_string($from)) {
|
Chris@0
|
53 $from = (string) $from;
|
Chris@0
|
54 }
|
Chris@0
|
55
|
Chris@0
|
56 if (!is_array($to) && !is_string($to)) {
|
Chris@0
|
57 $to = (string) $to;
|
Chris@0
|
58 }
|
Chris@0
|
59
|
Chris@0
|
60 $buffer = $this->header;
|
Chris@0
|
61 $diff = $this->diffToArray($from, $to, $lcs);
|
Chris@0
|
62
|
Chris@0
|
63 $inOld = false;
|
Chris@0
|
64 $i = 0;
|
Chris@0
|
65 $old = array();
|
Chris@0
|
66
|
Chris@0
|
67 foreach ($diff as $line) {
|
Chris@0
|
68 if ($line[1] === 0 /* OLD */) {
|
Chris@0
|
69 if ($inOld === false) {
|
Chris@0
|
70 $inOld = $i;
|
Chris@0
|
71 }
|
Chris@0
|
72 } elseif ($inOld !== false) {
|
Chris@0
|
73 if (($i - $inOld) > 5) {
|
Chris@0
|
74 $old[$inOld] = $i - 1;
|
Chris@0
|
75 }
|
Chris@0
|
76
|
Chris@0
|
77 $inOld = false;
|
Chris@0
|
78 }
|
Chris@0
|
79
|
Chris@0
|
80 ++$i;
|
Chris@0
|
81 }
|
Chris@0
|
82
|
Chris@0
|
83 $start = isset($old[0]) ? $old[0] : 0;
|
Chris@0
|
84 $end = count($diff);
|
Chris@0
|
85
|
Chris@0
|
86 if ($tmp = array_search($end, $old)) {
|
Chris@0
|
87 $end = $tmp;
|
Chris@0
|
88 }
|
Chris@0
|
89
|
Chris@0
|
90 $newChunk = true;
|
Chris@0
|
91
|
Chris@0
|
92 for ($i = $start; $i < $end; $i++) {
|
Chris@0
|
93 if (isset($old[$i])) {
|
Chris@0
|
94 $buffer .= "\n";
|
Chris@0
|
95 $newChunk = true;
|
Chris@0
|
96 $i = $old[$i];
|
Chris@0
|
97 }
|
Chris@0
|
98
|
Chris@0
|
99 if ($newChunk) {
|
Chris@0
|
100 if ($this->showNonDiffLines === true) {
|
Chris@0
|
101 $buffer .= "@@ @@\n";
|
Chris@0
|
102 }
|
Chris@0
|
103 $newChunk = false;
|
Chris@0
|
104 }
|
Chris@0
|
105
|
Chris@0
|
106 if ($diff[$i][1] === 1 /* ADDED */) {
|
Chris@0
|
107 $buffer .= '+' . $diff[$i][0] . "\n";
|
Chris@0
|
108 } elseif ($diff[$i][1] === 2 /* REMOVED */) {
|
Chris@0
|
109 $buffer .= '-' . $diff[$i][0] . "\n";
|
Chris@0
|
110 } elseif ($this->showNonDiffLines === true) {
|
Chris@0
|
111 $buffer .= ' ' . $diff[$i][0] . "\n";
|
Chris@0
|
112 }
|
Chris@0
|
113 }
|
Chris@0
|
114
|
Chris@0
|
115 return $buffer;
|
Chris@0
|
116 }
|
Chris@0
|
117
|
Chris@0
|
118 /**
|
Chris@0
|
119 * Returns the diff between two arrays or strings as array.
|
Chris@0
|
120 *
|
Chris@0
|
121 * Each array element contains two elements:
|
Chris@0
|
122 * - [0] => string $token
|
Chris@0
|
123 * - [1] => 2|1|0
|
Chris@0
|
124 *
|
Chris@0
|
125 * - 2: REMOVED: $token was removed from $from
|
Chris@0
|
126 * - 1: ADDED: $token was added to $from
|
Chris@0
|
127 * - 0: OLD: $token is not changed in $to
|
Chris@0
|
128 *
|
Chris@0
|
129 * @param array|string $from
|
Chris@0
|
130 * @param array|string $to
|
Chris@0
|
131 * @param LongestCommonSubsequence $lcs
|
Chris@0
|
132 *
|
Chris@0
|
133 * @return array
|
Chris@0
|
134 */
|
Chris@0
|
135 public function diffToArray($from, $to, LongestCommonSubsequence $lcs = null)
|
Chris@0
|
136 {
|
Chris@0
|
137 preg_match_all('(\r\n|\r|\n)', $from, $fromMatches);
|
Chris@0
|
138 preg_match_all('(\r\n|\r|\n)', $to, $toMatches);
|
Chris@0
|
139
|
Chris@0
|
140 if (is_string($from)) {
|
Chris@0
|
141 $from = preg_split('(\r\n|\r|\n)', $from);
|
Chris@0
|
142 }
|
Chris@0
|
143
|
Chris@0
|
144 if (is_string($to)) {
|
Chris@0
|
145 $to = preg_split('(\r\n|\r|\n)', $to);
|
Chris@0
|
146 }
|
Chris@0
|
147
|
Chris@0
|
148 $start = array();
|
Chris@0
|
149 $end = array();
|
Chris@0
|
150 $fromLength = count($from);
|
Chris@0
|
151 $toLength = count($to);
|
Chris@0
|
152 $length = min($fromLength, $toLength);
|
Chris@0
|
153
|
Chris@0
|
154 for ($i = 0; $i < $length; ++$i) {
|
Chris@0
|
155 if ($from[$i] === $to[$i]) {
|
Chris@0
|
156 $start[] = $from[$i];
|
Chris@0
|
157 unset($from[$i], $to[$i]);
|
Chris@0
|
158 } else {
|
Chris@0
|
159 break;
|
Chris@0
|
160 }
|
Chris@0
|
161 }
|
Chris@0
|
162
|
Chris@0
|
163 $length -= $i;
|
Chris@0
|
164
|
Chris@0
|
165 for ($i = 1; $i < $length; ++$i) {
|
Chris@0
|
166 if ($from[$fromLength - $i] === $to[$toLength - $i]) {
|
Chris@0
|
167 array_unshift($end, $from[$fromLength - $i]);
|
Chris@0
|
168 unset($from[$fromLength - $i], $to[$toLength - $i]);
|
Chris@0
|
169 } else {
|
Chris@0
|
170 break;
|
Chris@0
|
171 }
|
Chris@0
|
172 }
|
Chris@0
|
173
|
Chris@0
|
174 if ($lcs === null) {
|
Chris@0
|
175 $lcs = $this->selectLcsImplementation($from, $to);
|
Chris@0
|
176 }
|
Chris@0
|
177
|
Chris@0
|
178 $common = $lcs->calculate(array_values($from), array_values($to));
|
Chris@0
|
179 $diff = array();
|
Chris@0
|
180
|
Chris@0
|
181 if (isset($fromMatches[0]) && $toMatches[0] &&
|
Chris@0
|
182 count($fromMatches[0]) === count($toMatches[0]) &&
|
Chris@0
|
183 $fromMatches[0] !== $toMatches[0]) {
|
Chris@0
|
184 $diff[] = array(
|
Chris@0
|
185 '#Warning: Strings contain different line endings!', 0
|
Chris@0
|
186 );
|
Chris@0
|
187 }
|
Chris@0
|
188
|
Chris@0
|
189 foreach ($start as $token) {
|
Chris@0
|
190 $diff[] = array($token, 0 /* OLD */);
|
Chris@0
|
191 }
|
Chris@0
|
192
|
Chris@0
|
193 reset($from);
|
Chris@0
|
194 reset($to);
|
Chris@0
|
195
|
Chris@0
|
196 foreach ($common as $token) {
|
Chris@0
|
197 while ((($fromToken = reset($from)) !== $token)) {
|
Chris@0
|
198 $diff[] = array(array_shift($from), 2 /* REMOVED */);
|
Chris@0
|
199 }
|
Chris@0
|
200
|
Chris@0
|
201 while ((($toToken = reset($to)) !== $token)) {
|
Chris@0
|
202 $diff[] = array(array_shift($to), 1 /* ADDED */);
|
Chris@0
|
203 }
|
Chris@0
|
204
|
Chris@0
|
205 $diff[] = array($token, 0 /* OLD */);
|
Chris@0
|
206
|
Chris@0
|
207 array_shift($from);
|
Chris@0
|
208 array_shift($to);
|
Chris@0
|
209 }
|
Chris@0
|
210
|
Chris@0
|
211 while (($token = array_shift($from)) !== null) {
|
Chris@0
|
212 $diff[] = array($token, 2 /* REMOVED */);
|
Chris@0
|
213 }
|
Chris@0
|
214
|
Chris@0
|
215 while (($token = array_shift($to)) !== null) {
|
Chris@0
|
216 $diff[] = array($token, 1 /* ADDED */);
|
Chris@0
|
217 }
|
Chris@0
|
218
|
Chris@0
|
219 foreach ($end as $token) {
|
Chris@0
|
220 $diff[] = array($token, 0 /* OLD */);
|
Chris@0
|
221 }
|
Chris@0
|
222
|
Chris@0
|
223 return $diff;
|
Chris@0
|
224 }
|
Chris@0
|
225
|
Chris@0
|
226 /**
|
Chris@0
|
227 * @param array $from
|
Chris@0
|
228 * @param array $to
|
Chris@0
|
229 *
|
Chris@0
|
230 * @return LongestCommonSubsequence
|
Chris@0
|
231 */
|
Chris@0
|
232 private function selectLcsImplementation(array $from, array $to)
|
Chris@0
|
233 {
|
Chris@0
|
234 // We do not want to use the time-efficient implementation if its memory
|
Chris@0
|
235 // footprint will probably exceed this value. Note that the footprint
|
Chris@0
|
236 // calculation is only an estimation for the matrix and the LCS method
|
Chris@0
|
237 // will typically allocate a bit more memory than this.
|
Chris@0
|
238 $memoryLimit = 100 * 1024 * 1024;
|
Chris@0
|
239
|
Chris@0
|
240 if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) {
|
Chris@0
|
241 return new MemoryEfficientImplementation;
|
Chris@0
|
242 }
|
Chris@0
|
243
|
Chris@0
|
244 return new TimeEfficientImplementation;
|
Chris@0
|
245 }
|
Chris@0
|
246
|
Chris@0
|
247 /**
|
Chris@0
|
248 * Calculates the estimated memory footprint for the DP-based method.
|
Chris@0
|
249 *
|
Chris@0
|
250 * @param array $from
|
Chris@0
|
251 * @param array $to
|
Chris@0
|
252 *
|
Chris@0
|
253 * @return int
|
Chris@0
|
254 */
|
Chris@0
|
255 private function calculateEstimatedFootprint(array $from, array $to)
|
Chris@0
|
256 {
|
Chris@0
|
257 $itemSize = PHP_INT_SIZE == 4 ? 76 : 144;
|
Chris@0
|
258
|
Chris@0
|
259 return $itemSize * pow(min(count($from), count($to)), 2);
|
Chris@0
|
260 }
|
Chris@0
|
261 }
|