Chris@14
|
1 <?php declare(strict_types=1);
|
Chris@0
|
2 /*
|
Chris@12
|
3 * This file is part of sebastian/diff.
|
Chris@0
|
4 *
|
Chris@0
|
5 * (c) Sebastian Bergmann <sebastian@phpunit.de>
|
Chris@0
|
6 *
|
Chris@0
|
7 * For the full copyright and license information, please view the LICENSE
|
Chris@0
|
8 * file that was distributed with this source code.
|
Chris@0
|
9 */
|
Chris@0
|
10
|
Chris@0
|
11 namespace SebastianBergmann\Diff;
|
Chris@0
|
12
|
Chris@14
|
13 use SebastianBergmann\Diff\Output\DiffOutputBuilderInterface;
|
Chris@14
|
14 use SebastianBergmann\Diff\Output\UnifiedDiffOutputBuilder;
|
Chris@0
|
15
|
Chris@0
|
16 /**
|
Chris@0
|
17 * Diff implementation.
|
Chris@0
|
18 */
|
Chris@14
|
19 final class Differ
|
Chris@0
|
20 {
|
Chris@0
|
21 /**
|
Chris@14
|
22 * @var DiffOutputBuilderInterface
|
Chris@0
|
23 */
|
Chris@14
|
24 private $outputBuilder;
|
Chris@0
|
25
|
Chris@0
|
26 /**
|
Chris@14
|
27 * @param DiffOutputBuilderInterface $outputBuilder
|
Chris@14
|
28 *
|
Chris@14
|
29 * @throws InvalidArgumentException
|
Chris@0
|
30 */
|
Chris@14
|
31 public function __construct($outputBuilder = null)
|
Chris@0
|
32 {
|
Chris@14
|
33 if ($outputBuilder instanceof DiffOutputBuilderInterface) {
|
Chris@14
|
34 $this->outputBuilder = $outputBuilder;
|
Chris@14
|
35 } elseif (null === $outputBuilder) {
|
Chris@14
|
36 $this->outputBuilder = new UnifiedDiffOutputBuilder;
|
Chris@14
|
37 } elseif (\is_string($outputBuilder)) {
|
Chris@14
|
38 // PHPUnit 6.1.4, 6.2.0, 6.2.1, 6.2.2, and 6.2.3 support
|
Chris@14
|
39 // @see https://github.com/sebastianbergmann/phpunit/issues/2734#issuecomment-314514056
|
Chris@14
|
40 // @deprecated
|
Chris@14
|
41 $this->outputBuilder = new UnifiedDiffOutputBuilder($outputBuilder);
|
Chris@14
|
42 } else {
|
Chris@14
|
43 throw new InvalidArgumentException(
|
Chris@14
|
44 \sprintf(
|
Chris@14
|
45 'Expected builder to be an instance of DiffOutputBuilderInterface, <null> or a string, got %s.',
|
Chris@14
|
46 \is_object($outputBuilder) ? 'instance of "' . \get_class($outputBuilder) . '"' : \gettype($outputBuilder) . ' "' . $outputBuilder . '"'
|
Chris@14
|
47 )
|
Chris@14
|
48 );
|
Chris@14
|
49 }
|
Chris@0
|
50 }
|
Chris@0
|
51
|
Chris@0
|
52 /**
|
Chris@0
|
53 * Returns the diff between two arrays or strings as string.
|
Chris@0
|
54 *
|
Chris@14
|
55 * @param array|string $from
|
Chris@14
|
56 * @param array|string $to
|
Chris@14
|
57 * @param LongestCommonSubsequenceCalculator|null $lcs
|
Chris@0
|
58 *
|
Chris@0
|
59 * @return string
|
Chris@0
|
60 */
|
Chris@14
|
61 public function diff($from, $to, LongestCommonSubsequenceCalculator $lcs = null): string
|
Chris@0
|
62 {
|
Chris@14
|
63 $from = $this->validateDiffInput($from);
|
Chris@14
|
64 $to = $this->validateDiffInput($to);
|
Chris@14
|
65 $diff = $this->diffToArray($from, $to, $lcs);
|
Chris@12
|
66
|
Chris@14
|
67 return $this->outputBuilder->getDiff($diff);
|
Chris@12
|
68 }
|
Chris@12
|
69
|
Chris@12
|
70 /**
|
Chris@12
|
71 * Casts variable to string if it is not a string or array.
|
Chris@12
|
72 *
|
Chris@12
|
73 * @param mixed $input
|
Chris@12
|
74 *
|
Chris@12
|
75 * @return string
|
Chris@12
|
76 */
|
Chris@14
|
77 private function validateDiffInput($input): string
|
Chris@12
|
78 {
|
Chris@12
|
79 if (!\is_array($input) && !\is_string($input)) {
|
Chris@12
|
80 return (string) $input;
|
Chris@0
|
81 }
|
Chris@0
|
82
|
Chris@12
|
83 return $input;
|
Chris@12
|
84 }
|
Chris@0
|
85
|
Chris@12
|
86 /**
|
Chris@0
|
87 * Returns the diff between two arrays or strings as array.
|
Chris@0
|
88 *
|
Chris@0
|
89 * Each array element contains two elements:
|
Chris@12
|
90 * - [0] => mixed $token
|
Chris@0
|
91 * - [1] => 2|1|0
|
Chris@0
|
92 *
|
Chris@0
|
93 * - 2: REMOVED: $token was removed from $from
|
Chris@0
|
94 * - 1: ADDED: $token was added to $from
|
Chris@0
|
95 * - 0: OLD: $token is not changed in $to
|
Chris@0
|
96 *
|
Chris@14
|
97 * @param array|string $from
|
Chris@14
|
98 * @param array|string $to
|
Chris@14
|
99 * @param LongestCommonSubsequenceCalculator $lcs
|
Chris@0
|
100 *
|
Chris@0
|
101 * @return array
|
Chris@0
|
102 */
|
Chris@14
|
103 public function diffToArray($from, $to, LongestCommonSubsequenceCalculator $lcs = null): array
|
Chris@0
|
104 {
|
Chris@12
|
105 if (\is_string($from)) {
|
Chris@14
|
106 $from = $this->splitStringByLines($from);
|
Chris@14
|
107 } elseif (!\is_array($from)) {
|
Chris@12
|
108 throw new \InvalidArgumentException('"from" must be an array or string.');
|
Chris@0
|
109 }
|
Chris@0
|
110
|
Chris@12
|
111 if (\is_string($to)) {
|
Chris@14
|
112 $to = $this->splitStringByLines($to);
|
Chris@14
|
113 } elseif (!\is_array($to)) {
|
Chris@12
|
114 throw new \InvalidArgumentException('"to" must be an array or string.');
|
Chris@0
|
115 }
|
Chris@0
|
116
|
Chris@12
|
117 list($from, $to, $start, $end) = self::getArrayDiffParted($from, $to);
|
Chris@0
|
118
|
Chris@0
|
119 if ($lcs === null) {
|
Chris@0
|
120 $lcs = $this->selectLcsImplementation($from, $to);
|
Chris@0
|
121 }
|
Chris@0
|
122
|
Chris@12
|
123 $common = $lcs->calculate(\array_values($from), \array_values($to));
|
Chris@14
|
124 $diff = [];
|
Chris@0
|
125
|
Chris@0
|
126 foreach ($start as $token) {
|
Chris@14
|
127 $diff[] = [$token, 0 /* OLD */];
|
Chris@0
|
128 }
|
Chris@0
|
129
|
Chris@12
|
130 \reset($from);
|
Chris@12
|
131 \reset($to);
|
Chris@0
|
132
|
Chris@0
|
133 foreach ($common as $token) {
|
Chris@12
|
134 while (($fromToken = \reset($from)) !== $token) {
|
Chris@14
|
135 $diff[] = [\array_shift($from), 2 /* REMOVED */];
|
Chris@0
|
136 }
|
Chris@0
|
137
|
Chris@12
|
138 while (($toToken = \reset($to)) !== $token) {
|
Chris@14
|
139 $diff[] = [\array_shift($to), 1 /* ADDED */];
|
Chris@0
|
140 }
|
Chris@0
|
141
|
Chris@14
|
142 $diff[] = [$token, 0 /* OLD */];
|
Chris@0
|
143
|
Chris@12
|
144 \array_shift($from);
|
Chris@12
|
145 \array_shift($to);
|
Chris@0
|
146 }
|
Chris@0
|
147
|
Chris@12
|
148 while (($token = \array_shift($from)) !== null) {
|
Chris@14
|
149 $diff[] = [$token, 2 /* REMOVED */];
|
Chris@0
|
150 }
|
Chris@0
|
151
|
Chris@12
|
152 while (($token = \array_shift($to)) !== null) {
|
Chris@14
|
153 $diff[] = [$token, 1 /* ADDED */];
|
Chris@0
|
154 }
|
Chris@0
|
155
|
Chris@0
|
156 foreach ($end as $token) {
|
Chris@14
|
157 $diff[] = [$token, 0 /* OLD */];
|
Chris@14
|
158 }
|
Chris@14
|
159
|
Chris@14
|
160 if ($this->detectUnmatchedLineEndings($diff)) {
|
Chris@14
|
161 \array_unshift($diff, ["#Warning: Strings contain different line endings!\n", 3]);
|
Chris@0
|
162 }
|
Chris@0
|
163
|
Chris@0
|
164 return $diff;
|
Chris@0
|
165 }
|
Chris@0
|
166
|
Chris@0
|
167 /**
|
Chris@12
|
168 * Checks if input is string, if so it will split it line-by-line.
|
Chris@12
|
169 *
|
Chris@12
|
170 * @param string $input
|
Chris@12
|
171 *
|
Chris@12
|
172 * @return array
|
Chris@12
|
173 */
|
Chris@14
|
174 private function splitStringByLines(string $input): array
|
Chris@12
|
175 {
|
Chris@14
|
176 return \preg_split('/(.*\R)/', $input, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
|
Chris@12
|
177 }
|
Chris@12
|
178
|
Chris@12
|
179 /**
|
Chris@0
|
180 * @param array $from
|
Chris@0
|
181 * @param array $to
|
Chris@0
|
182 *
|
Chris@14
|
183 * @return LongestCommonSubsequenceCalculator
|
Chris@0
|
184 */
|
Chris@14
|
185 private function selectLcsImplementation(array $from, array $to): LongestCommonSubsequenceCalculator
|
Chris@0
|
186 {
|
Chris@0
|
187 // We do not want to use the time-efficient implementation if its memory
|
Chris@0
|
188 // footprint will probably exceed this value. Note that the footprint
|
Chris@0
|
189 // calculation is only an estimation for the matrix and the LCS method
|
Chris@0
|
190 // will typically allocate a bit more memory than this.
|
Chris@0
|
191 $memoryLimit = 100 * 1024 * 1024;
|
Chris@0
|
192
|
Chris@0
|
193 if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) {
|
Chris@14
|
194 return new MemoryEfficientLongestCommonSubsequenceCalculator;
|
Chris@0
|
195 }
|
Chris@0
|
196
|
Chris@14
|
197 return new TimeEfficientLongestCommonSubsequenceCalculator;
|
Chris@0
|
198 }
|
Chris@0
|
199
|
Chris@0
|
200 /**
|
Chris@0
|
201 * Calculates the estimated memory footprint for the DP-based method.
|
Chris@0
|
202 *
|
Chris@0
|
203 * @param array $from
|
Chris@0
|
204 * @param array $to
|
Chris@0
|
205 *
|
Chris@12
|
206 * @return int|float
|
Chris@0
|
207 */
|
Chris@0
|
208 private function calculateEstimatedFootprint(array $from, array $to)
|
Chris@0
|
209 {
|
Chris@12
|
210 $itemSize = PHP_INT_SIZE === 4 ? 76 : 144;
|
Chris@0
|
211
|
Chris@14
|
212 return $itemSize * \min(\count($from), \count($to)) ** 2;
|
Chris@12
|
213 }
|
Chris@12
|
214
|
Chris@12
|
215 /**
|
Chris@14
|
216 * Returns true if line ends don't match in a diff.
|
Chris@12
|
217 *
|
Chris@14
|
218 * @param array $diff
|
Chris@12
|
219 *
|
Chris@12
|
220 * @return bool
|
Chris@12
|
221 */
|
Chris@14
|
222 private function detectUnmatchedLineEndings(array $diff): bool
|
Chris@12
|
223 {
|
Chris@14
|
224 $newLineBreaks = ['' => true];
|
Chris@14
|
225 $oldLineBreaks = ['' => true];
|
Chris@14
|
226
|
Chris@14
|
227 foreach ($diff as $entry) {
|
Chris@14
|
228 if (0 === $entry[1]) { /* OLD */
|
Chris@14
|
229 $ln = $this->getLinebreak($entry[0]);
|
Chris@14
|
230 $oldLineBreaks[$ln] = true;
|
Chris@14
|
231 $newLineBreaks[$ln] = true;
|
Chris@14
|
232 } elseif (1 === $entry[1]) { /* ADDED */
|
Chris@14
|
233 $newLineBreaks[$this->getLinebreak($entry[0])] = true;
|
Chris@14
|
234 } elseif (2 === $entry[1]) { /* REMOVED */
|
Chris@14
|
235 $oldLineBreaks[$this->getLinebreak($entry[0])] = true;
|
Chris@14
|
236 }
|
Chris@14
|
237 }
|
Chris@14
|
238
|
Chris@14
|
239 // if either input or output is a single line without breaks than no warning should be raised
|
Chris@14
|
240 if (['' => true] === $newLineBreaks || ['' => true] === $oldLineBreaks) {
|
Chris@14
|
241 return false;
|
Chris@14
|
242 }
|
Chris@14
|
243
|
Chris@14
|
244 // two way compare
|
Chris@14
|
245 foreach ($newLineBreaks as $break => $set) {
|
Chris@14
|
246 if (!isset($oldLineBreaks[$break])) {
|
Chris@14
|
247 return true;
|
Chris@14
|
248 }
|
Chris@14
|
249 }
|
Chris@14
|
250
|
Chris@14
|
251 foreach ($oldLineBreaks as $break => $set) {
|
Chris@14
|
252 if (!isset($newLineBreaks[$break])) {
|
Chris@14
|
253 return true;
|
Chris@14
|
254 }
|
Chris@14
|
255 }
|
Chris@14
|
256
|
Chris@14
|
257 return false;
|
Chris@12
|
258 }
|
Chris@12
|
259
|
Chris@14
|
260 private function getLinebreak($line): string
|
Chris@12
|
261 {
|
Chris@14
|
262 if (!\is_string($line)) {
|
Chris@14
|
263 return '';
|
Chris@14
|
264 }
|
Chris@14
|
265
|
Chris@14
|
266 $lc = \substr($line, -1);
|
Chris@14
|
267 if ("\r" === $lc) {
|
Chris@14
|
268 return "\r";
|
Chris@14
|
269 }
|
Chris@14
|
270
|
Chris@14
|
271 if ("\n" !== $lc) {
|
Chris@14
|
272 return '';
|
Chris@14
|
273 }
|
Chris@14
|
274
|
Chris@14
|
275 if ("\r\n" === \substr($line, -2)) {
|
Chris@14
|
276 return "\r\n";
|
Chris@14
|
277 }
|
Chris@14
|
278
|
Chris@14
|
279 return "\n";
|
Chris@14
|
280 }
|
Chris@14
|
281
|
Chris@14
|
282 private static function getArrayDiffParted(array &$from, array &$to): array
|
Chris@14
|
283 {
|
Chris@14
|
284 $start = [];
|
Chris@14
|
285 $end = [];
|
Chris@12
|
286
|
Chris@12
|
287 \reset($to);
|
Chris@12
|
288
|
Chris@12
|
289 foreach ($from as $k => $v) {
|
Chris@12
|
290 $toK = \key($to);
|
Chris@12
|
291
|
Chris@12
|
292 if ($toK === $k && $v === $to[$k]) {
|
Chris@12
|
293 $start[$k] = $v;
|
Chris@12
|
294
|
Chris@12
|
295 unset($from[$k], $to[$k]);
|
Chris@12
|
296 } else {
|
Chris@12
|
297 break;
|
Chris@12
|
298 }
|
Chris@12
|
299 }
|
Chris@12
|
300
|
Chris@12
|
301 \end($from);
|
Chris@12
|
302 \end($to);
|
Chris@12
|
303
|
Chris@12
|
304 do {
|
Chris@12
|
305 $fromK = \key($from);
|
Chris@12
|
306 $toK = \key($to);
|
Chris@12
|
307
|
Chris@12
|
308 if (null === $fromK || null === $toK || \current($from) !== \current($to)) {
|
Chris@12
|
309 break;
|
Chris@12
|
310 }
|
Chris@12
|
311
|
Chris@12
|
312 \prev($from);
|
Chris@12
|
313 \prev($to);
|
Chris@12
|
314
|
Chris@14
|
315 $end = [$fromK => $from[$fromK]] + $end;
|
Chris@12
|
316 unset($from[$fromK], $to[$toK]);
|
Chris@12
|
317 } while (true);
|
Chris@12
|
318
|
Chris@14
|
319 return [$from, $to, $start, $end];
|
Chris@0
|
320 }
|
Chris@0
|
321 }
|