Chris@0
|
1 <?php
|
Chris@0
|
2 /*
|
Chris@12
|
3 * This file is part of sebastian/diff.
|
Chris@0
|
4 *
|
Chris@0
|
5 * (c) Sebastian Bergmann <sebastian@phpunit.de>
|
Chris@0
|
6 *
|
Chris@0
|
7 * For the full copyright and license information, please view the LICENSE
|
Chris@0
|
8 * file that was distributed with this source code.
|
Chris@0
|
9 */
|
Chris@0
|
10
|
Chris@0
|
11 namespace SebastianBergmann\Diff;
|
Chris@0
|
12
|
Chris@0
|
13 use SebastianBergmann\Diff\LCS\LongestCommonSubsequence;
|
Chris@0
|
14 use SebastianBergmann\Diff\LCS\TimeEfficientImplementation;
|
Chris@0
|
15 use SebastianBergmann\Diff\LCS\MemoryEfficientImplementation;
|
Chris@0
|
16
|
Chris@0
|
17 /**
|
Chris@0
|
18 * Diff implementation.
|
Chris@0
|
19 */
|
Chris@0
|
20 class Differ
|
Chris@0
|
21 {
|
Chris@0
|
22 /**
|
Chris@0
|
23 * @var string
|
Chris@0
|
24 */
|
Chris@0
|
25 private $header;
|
Chris@0
|
26
|
Chris@0
|
27 /**
|
Chris@0
|
28 * @var bool
|
Chris@0
|
29 */
|
Chris@0
|
30 private $showNonDiffLines;
|
Chris@0
|
31
|
Chris@0
|
32 /**
|
Chris@0
|
33 * @param string $header
|
Chris@12
|
34 * @param bool $showNonDiffLines
|
Chris@0
|
35 */
|
Chris@0
|
36 public function __construct($header = "--- Original\n+++ New\n", $showNonDiffLines = true)
|
Chris@0
|
37 {
|
Chris@0
|
38 $this->header = $header;
|
Chris@0
|
39 $this->showNonDiffLines = $showNonDiffLines;
|
Chris@0
|
40 }
|
Chris@0
|
41
|
Chris@0
|
42 /**
|
Chris@0
|
43 * Returns the diff between two arrays or strings as string.
|
Chris@0
|
44 *
|
Chris@0
|
45 * @param array|string $from
|
Chris@0
|
46 * @param array|string $to
|
Chris@0
|
47 * @param LongestCommonSubsequence $lcs
|
Chris@0
|
48 *
|
Chris@0
|
49 * @return string
|
Chris@0
|
50 */
|
Chris@0
|
51 public function diff($from, $to, LongestCommonSubsequence $lcs = null)
|
Chris@0
|
52 {
|
Chris@12
|
53 $from = $this->validateDiffInput($from);
|
Chris@12
|
54 $to = $this->validateDiffInput($to);
|
Chris@12
|
55 $diff = $this->diffToArray($from, $to, $lcs);
|
Chris@12
|
56 $old = $this->checkIfDiffInOld($diff);
|
Chris@12
|
57 $start = isset($old[0]) ? $old[0] : 0;
|
Chris@12
|
58 $end = \count($diff);
|
Chris@12
|
59
|
Chris@12
|
60 if ($tmp = \array_search($end, $old)) {
|
Chris@12
|
61 $end = $tmp;
|
Chris@0
|
62 }
|
Chris@0
|
63
|
Chris@12
|
64 return $this->getBuffer($diff, $old, $start, $end);
|
Chris@12
|
65 }
|
Chris@12
|
66
|
Chris@12
|
67 /**
|
Chris@12
|
68 * Casts variable to string if it is not a string or array.
|
Chris@12
|
69 *
|
Chris@12
|
70 * @param mixed $input
|
Chris@12
|
71 *
|
Chris@12
|
72 * @return string
|
Chris@12
|
73 */
|
Chris@12
|
74 private function validateDiffInput($input)
|
Chris@12
|
75 {
|
Chris@12
|
76 if (!\is_array($input) && !\is_string($input)) {
|
Chris@12
|
77 return (string) $input;
|
Chris@0
|
78 }
|
Chris@0
|
79
|
Chris@12
|
80 return $input;
|
Chris@12
|
81 }
|
Chris@0
|
82
|
Chris@12
|
83 /**
|
Chris@12
|
84 * Takes input of the diff array and returns the old array.
|
Chris@12
|
85 * Iterates through diff line by line,
|
Chris@12
|
86 *
|
Chris@12
|
87 * @param array $diff
|
Chris@12
|
88 *
|
Chris@12
|
89 * @return array
|
Chris@12
|
90 */
|
Chris@12
|
91 private function checkIfDiffInOld(array $diff)
|
Chris@12
|
92 {
|
Chris@0
|
93 $inOld = false;
|
Chris@0
|
94 $i = 0;
|
Chris@0
|
95 $old = array();
|
Chris@0
|
96
|
Chris@0
|
97 foreach ($diff as $line) {
|
Chris@12
|
98 if ($line[1] === 0 /* OLD */) {
|
Chris@0
|
99 if ($inOld === false) {
|
Chris@0
|
100 $inOld = $i;
|
Chris@0
|
101 }
|
Chris@0
|
102 } elseif ($inOld !== false) {
|
Chris@0
|
103 if (($i - $inOld) > 5) {
|
Chris@0
|
104 $old[$inOld] = $i - 1;
|
Chris@0
|
105 }
|
Chris@0
|
106
|
Chris@0
|
107 $inOld = false;
|
Chris@0
|
108 }
|
Chris@0
|
109
|
Chris@0
|
110 ++$i;
|
Chris@0
|
111 }
|
Chris@0
|
112
|
Chris@12
|
113 return $old;
|
Chris@12
|
114 }
|
Chris@0
|
115
|
Chris@12
|
116 /**
|
Chris@12
|
117 * Generates buffer in string format, returning the patch.
|
Chris@12
|
118 *
|
Chris@12
|
119 * @param array $diff
|
Chris@12
|
120 * @param array $old
|
Chris@12
|
121 * @param int $start
|
Chris@12
|
122 * @param int $end
|
Chris@12
|
123 *
|
Chris@12
|
124 * @return string
|
Chris@12
|
125 */
|
Chris@12
|
126 private function getBuffer(array $diff, array $old, $start, $end)
|
Chris@12
|
127 {
|
Chris@12
|
128 $buffer = $this->header;
|
Chris@12
|
129
|
Chris@12
|
130 if (!isset($old[$start])) {
|
Chris@12
|
131 $buffer = $this->getDiffBufferElementNew($diff, $buffer, $start);
|
Chris@12
|
132 ++$start;
|
Chris@0
|
133 }
|
Chris@0
|
134
|
Chris@0
|
135 for ($i = $start; $i < $end; $i++) {
|
Chris@0
|
136 if (isset($old[$i])) {
|
Chris@12
|
137 $i = $old[$i];
|
Chris@12
|
138 $buffer = $this->getDiffBufferElementNew($diff, $buffer, $i);
|
Chris@12
|
139 } else {
|
Chris@12
|
140 $buffer = $this->getDiffBufferElement($diff, $buffer, $i);
|
Chris@0
|
141 }
|
Chris@0
|
142 }
|
Chris@0
|
143
|
Chris@0
|
144 return $buffer;
|
Chris@0
|
145 }
|
Chris@0
|
146
|
Chris@0
|
147 /**
|
Chris@12
|
148 * Gets individual buffer element.
|
Chris@12
|
149 *
|
Chris@12
|
150 * @param array $diff
|
Chris@12
|
151 * @param string $buffer
|
Chris@12
|
152 * @param int $diffIndex
|
Chris@12
|
153 *
|
Chris@12
|
154 * @return string
|
Chris@12
|
155 */
|
Chris@12
|
156 private function getDiffBufferElement(array $diff, $buffer, $diffIndex)
|
Chris@12
|
157 {
|
Chris@12
|
158 if ($diff[$diffIndex][1] === 1 /* ADDED */) {
|
Chris@12
|
159 $buffer .= '+' . $diff[$diffIndex][0] . "\n";
|
Chris@12
|
160 } elseif ($diff[$diffIndex][1] === 2 /* REMOVED */) {
|
Chris@12
|
161 $buffer .= '-' . $diff[$diffIndex][0] . "\n";
|
Chris@12
|
162 } elseif ($this->showNonDiffLines === true) {
|
Chris@12
|
163 $buffer .= ' ' . $diff[$diffIndex][0] . "\n";
|
Chris@12
|
164 }
|
Chris@12
|
165
|
Chris@12
|
166 return $buffer;
|
Chris@12
|
167 }
|
Chris@12
|
168
|
Chris@12
|
169 /**
|
Chris@12
|
170 * Gets individual buffer element with opening.
|
Chris@12
|
171 *
|
Chris@12
|
172 * @param array $diff
|
Chris@12
|
173 * @param string $buffer
|
Chris@12
|
174 * @param int $diffIndex
|
Chris@12
|
175 *
|
Chris@12
|
176 * @return string
|
Chris@12
|
177 */
|
Chris@12
|
178 private function getDiffBufferElementNew(array $diff, $buffer, $diffIndex)
|
Chris@12
|
179 {
|
Chris@12
|
180 if ($this->showNonDiffLines === true) {
|
Chris@12
|
181 $buffer .= "@@ @@\n";
|
Chris@12
|
182 }
|
Chris@12
|
183
|
Chris@12
|
184 return $this->getDiffBufferElement($diff, $buffer, $diffIndex);
|
Chris@12
|
185 }
|
Chris@12
|
186
|
Chris@12
|
187 /**
|
Chris@0
|
188 * Returns the diff between two arrays or strings as array.
|
Chris@0
|
189 *
|
Chris@0
|
190 * Each array element contains two elements:
|
Chris@12
|
191 * - [0] => mixed $token
|
Chris@0
|
192 * - [1] => 2|1|0
|
Chris@0
|
193 *
|
Chris@0
|
194 * - 2: REMOVED: $token was removed from $from
|
Chris@0
|
195 * - 1: ADDED: $token was added to $from
|
Chris@0
|
196 * - 0: OLD: $token is not changed in $to
|
Chris@0
|
197 *
|
Chris@0
|
198 * @param array|string $from
|
Chris@0
|
199 * @param array|string $to
|
Chris@0
|
200 * @param LongestCommonSubsequence $lcs
|
Chris@0
|
201 *
|
Chris@0
|
202 * @return array
|
Chris@0
|
203 */
|
Chris@0
|
204 public function diffToArray($from, $to, LongestCommonSubsequence $lcs = null)
|
Chris@0
|
205 {
|
Chris@12
|
206 if (\is_string($from)) {
|
Chris@12
|
207 $fromMatches = $this->getNewLineMatches($from);
|
Chris@12
|
208 $from = $this->splitStringByLines($from);
|
Chris@12
|
209 } elseif (\is_array($from)) {
|
Chris@12
|
210 $fromMatches = array();
|
Chris@12
|
211 } else {
|
Chris@12
|
212 throw new \InvalidArgumentException('"from" must be an array or string.');
|
Chris@0
|
213 }
|
Chris@0
|
214
|
Chris@12
|
215 if (\is_string($to)) {
|
Chris@12
|
216 $toMatches = $this->getNewLineMatches($to);
|
Chris@12
|
217 $to = $this->splitStringByLines($to);
|
Chris@12
|
218 } elseif (\is_array($to)) {
|
Chris@12
|
219 $toMatches = array();
|
Chris@12
|
220 } else {
|
Chris@12
|
221 throw new \InvalidArgumentException('"to" must be an array or string.');
|
Chris@0
|
222 }
|
Chris@0
|
223
|
Chris@12
|
224 list($from, $to, $start, $end) = self::getArrayDiffParted($from, $to);
|
Chris@0
|
225
|
Chris@0
|
226 if ($lcs === null) {
|
Chris@0
|
227 $lcs = $this->selectLcsImplementation($from, $to);
|
Chris@0
|
228 }
|
Chris@0
|
229
|
Chris@12
|
230 $common = $lcs->calculate(\array_values($from), \array_values($to));
|
Chris@0
|
231 $diff = array();
|
Chris@0
|
232
|
Chris@12
|
233 if ($this->detectUnmatchedLineEndings($fromMatches, $toMatches)) {
|
Chris@0
|
234 $diff[] = array(
|
Chris@12
|
235 '#Warning: Strings contain different line endings!',
|
Chris@12
|
236 0
|
Chris@0
|
237 );
|
Chris@0
|
238 }
|
Chris@0
|
239
|
Chris@0
|
240 foreach ($start as $token) {
|
Chris@0
|
241 $diff[] = array($token, 0 /* OLD */);
|
Chris@0
|
242 }
|
Chris@0
|
243
|
Chris@12
|
244 \reset($from);
|
Chris@12
|
245 \reset($to);
|
Chris@0
|
246
|
Chris@0
|
247 foreach ($common as $token) {
|
Chris@12
|
248 while (($fromToken = \reset($from)) !== $token) {
|
Chris@12
|
249 $diff[] = array(\array_shift($from), 2 /* REMOVED */);
|
Chris@0
|
250 }
|
Chris@0
|
251
|
Chris@12
|
252 while (($toToken = \reset($to)) !== $token) {
|
Chris@12
|
253 $diff[] = array(\array_shift($to), 1 /* ADDED */);
|
Chris@0
|
254 }
|
Chris@0
|
255
|
Chris@0
|
256 $diff[] = array($token, 0 /* OLD */);
|
Chris@0
|
257
|
Chris@12
|
258 \array_shift($from);
|
Chris@12
|
259 \array_shift($to);
|
Chris@0
|
260 }
|
Chris@0
|
261
|
Chris@12
|
262 while (($token = \array_shift($from)) !== null) {
|
Chris@0
|
263 $diff[] = array($token, 2 /* REMOVED */);
|
Chris@0
|
264 }
|
Chris@0
|
265
|
Chris@12
|
266 while (($token = \array_shift($to)) !== null) {
|
Chris@0
|
267 $diff[] = array($token, 1 /* ADDED */);
|
Chris@0
|
268 }
|
Chris@0
|
269
|
Chris@0
|
270 foreach ($end as $token) {
|
Chris@0
|
271 $diff[] = array($token, 0 /* OLD */);
|
Chris@0
|
272 }
|
Chris@0
|
273
|
Chris@0
|
274 return $diff;
|
Chris@0
|
275 }
|
Chris@0
|
276
|
Chris@0
|
277 /**
|
Chris@12
|
278 * Get new strings denoting new lines from a given string.
|
Chris@12
|
279 *
|
Chris@12
|
280 * @param string $string
|
Chris@12
|
281 *
|
Chris@12
|
282 * @return array
|
Chris@12
|
283 */
|
Chris@12
|
284 private function getNewLineMatches($string)
|
Chris@12
|
285 {
|
Chris@12
|
286 \preg_match_all('(\r\n|\r|\n)', $string, $stringMatches);
|
Chris@12
|
287
|
Chris@12
|
288 return $stringMatches;
|
Chris@12
|
289 }
|
Chris@12
|
290
|
Chris@12
|
291 /**
|
Chris@12
|
292 * Checks if input is string, if so it will split it line-by-line.
|
Chris@12
|
293 *
|
Chris@12
|
294 * @param string $input
|
Chris@12
|
295 *
|
Chris@12
|
296 * @return array
|
Chris@12
|
297 */
|
Chris@12
|
298 private function splitStringByLines($input)
|
Chris@12
|
299 {
|
Chris@12
|
300 return \preg_split('(\r\n|\r|\n)', $input);
|
Chris@12
|
301 }
|
Chris@12
|
302
|
Chris@12
|
303 /**
|
Chris@0
|
304 * @param array $from
|
Chris@0
|
305 * @param array $to
|
Chris@0
|
306 *
|
Chris@0
|
307 * @return LongestCommonSubsequence
|
Chris@0
|
308 */
|
Chris@0
|
309 private function selectLcsImplementation(array $from, array $to)
|
Chris@0
|
310 {
|
Chris@0
|
311 // We do not want to use the time-efficient implementation if its memory
|
Chris@0
|
312 // footprint will probably exceed this value. Note that the footprint
|
Chris@0
|
313 // calculation is only an estimation for the matrix and the LCS method
|
Chris@0
|
314 // will typically allocate a bit more memory than this.
|
Chris@0
|
315 $memoryLimit = 100 * 1024 * 1024;
|
Chris@0
|
316
|
Chris@0
|
317 if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) {
|
Chris@0
|
318 return new MemoryEfficientImplementation;
|
Chris@0
|
319 }
|
Chris@0
|
320
|
Chris@0
|
321 return new TimeEfficientImplementation;
|
Chris@0
|
322 }
|
Chris@0
|
323
|
Chris@0
|
324 /**
|
Chris@0
|
325 * Calculates the estimated memory footprint for the DP-based method.
|
Chris@0
|
326 *
|
Chris@0
|
327 * @param array $from
|
Chris@0
|
328 * @param array $to
|
Chris@0
|
329 *
|
Chris@12
|
330 * @return int|float
|
Chris@0
|
331 */
|
Chris@0
|
332 private function calculateEstimatedFootprint(array $from, array $to)
|
Chris@0
|
333 {
|
Chris@12
|
334 $itemSize = PHP_INT_SIZE === 4 ? 76 : 144;
|
Chris@0
|
335
|
Chris@12
|
336 return $itemSize * \pow(\min(\count($from), \count($to)), 2);
|
Chris@12
|
337 }
|
Chris@12
|
338
|
Chris@12
|
339 /**
|
Chris@12
|
340 * Returns true if line ends don't match on fromMatches and toMatches.
|
Chris@12
|
341 *
|
Chris@12
|
342 * @param array $fromMatches
|
Chris@12
|
343 * @param array $toMatches
|
Chris@12
|
344 *
|
Chris@12
|
345 * @return bool
|
Chris@12
|
346 */
|
Chris@12
|
347 private function detectUnmatchedLineEndings(array $fromMatches, array $toMatches)
|
Chris@12
|
348 {
|
Chris@12
|
349 return isset($fromMatches[0], $toMatches[0]) &&
|
Chris@12
|
350 \count($fromMatches[0]) === \count($toMatches[0]) &&
|
Chris@12
|
351 $fromMatches[0] !== $toMatches[0];
|
Chris@12
|
352 }
|
Chris@12
|
353
|
Chris@12
|
354 /**
|
Chris@12
|
355 * @param array $from
|
Chris@12
|
356 * @param array $to
|
Chris@12
|
357 *
|
Chris@12
|
358 * @return array
|
Chris@12
|
359 */
|
Chris@12
|
360 private static function getArrayDiffParted(array &$from, array &$to)
|
Chris@12
|
361 {
|
Chris@12
|
362 $start = array();
|
Chris@12
|
363 $end = array();
|
Chris@12
|
364
|
Chris@12
|
365 \reset($to);
|
Chris@12
|
366
|
Chris@12
|
367 foreach ($from as $k => $v) {
|
Chris@12
|
368 $toK = \key($to);
|
Chris@12
|
369
|
Chris@12
|
370 if ($toK === $k && $v === $to[$k]) {
|
Chris@12
|
371 $start[$k] = $v;
|
Chris@12
|
372
|
Chris@12
|
373 unset($from[$k], $to[$k]);
|
Chris@12
|
374 } else {
|
Chris@12
|
375 break;
|
Chris@12
|
376 }
|
Chris@12
|
377 }
|
Chris@12
|
378
|
Chris@12
|
379 \end($from);
|
Chris@12
|
380 \end($to);
|
Chris@12
|
381
|
Chris@12
|
382 do {
|
Chris@12
|
383 $fromK = \key($from);
|
Chris@12
|
384 $toK = \key($to);
|
Chris@12
|
385
|
Chris@12
|
386 if (null === $fromK || null === $toK || \current($from) !== \current($to)) {
|
Chris@12
|
387 break;
|
Chris@12
|
388 }
|
Chris@12
|
389
|
Chris@12
|
390 \prev($from);
|
Chris@12
|
391 \prev($to);
|
Chris@12
|
392
|
Chris@12
|
393 $end = array($fromK => $from[$fromK]) + $end;
|
Chris@12
|
394 unset($from[$fromK], $to[$toK]);
|
Chris@12
|
395 } while (true);
|
Chris@12
|
396
|
Chris@12
|
397 return array($from, $to, $start, $end);
|
Chris@0
|
398 }
|
Chris@0
|
399 }
|