Mercurial > hg > isophonics-drupal-site
comparison vendor/sebastian/diff/src/Differ.php @ 14:1fec387a4317
Update Drupal core to 8.5.2 via Composer
author | Chris Cannam |
---|---|
date | Mon, 23 Apr 2018 09:46:53 +0100 |
parents | 7a779792577d |
children |
comparison
equal
deleted
inserted
replaced
13:5fb285c0d0e3 | 14:1fec387a4317 |
---|---|
1 <?php | 1 <?php declare(strict_types=1); |
2 /* | 2 /* |
3 * This file is part of sebastian/diff. | 3 * This file is part of sebastian/diff. |
4 * | 4 * |
5 * (c) Sebastian Bergmann <sebastian@phpunit.de> | 5 * (c) Sebastian Bergmann <sebastian@phpunit.de> |
6 * | 6 * |
8 * file that was distributed with this source code. | 8 * file that was distributed with this source code. |
9 */ | 9 */ |
10 | 10 |
11 namespace SebastianBergmann\Diff; | 11 namespace SebastianBergmann\Diff; |
12 | 12 |
13 use SebastianBergmann\Diff\LCS\LongestCommonSubsequence; | 13 use SebastianBergmann\Diff\Output\DiffOutputBuilderInterface; |
14 use SebastianBergmann\Diff\LCS\TimeEfficientImplementation; | 14 use SebastianBergmann\Diff\Output\UnifiedDiffOutputBuilder; |
15 use SebastianBergmann\Diff\LCS\MemoryEfficientImplementation; | |
16 | 15 |
17 /** | 16 /** |
18 * Diff implementation. | 17 * Diff implementation. |
19 */ | 18 */ |
20 class Differ | 19 final class Differ |
21 { | 20 { |
22 /** | 21 /** |
23 * @var string | 22 * @var DiffOutputBuilderInterface |
24 */ | 23 */ |
25 private $header; | 24 private $outputBuilder; |
26 | 25 |
27 /** | 26 /** |
28 * @var bool | 27 * @param DiffOutputBuilderInterface $outputBuilder |
29 */ | 28 * |
30 private $showNonDiffLines; | 29 * @throws InvalidArgumentException |
31 | 30 */ |
32 /** | 31 public function __construct($outputBuilder = null) |
33 * @param string $header | 32 { |
34 * @param bool $showNonDiffLines | 33 if ($outputBuilder instanceof DiffOutputBuilderInterface) { |
35 */ | 34 $this->outputBuilder = $outputBuilder; |
36 public function __construct($header = "--- Original\n+++ New\n", $showNonDiffLines = true) | 35 } elseif (null === $outputBuilder) { |
37 { | 36 $this->outputBuilder = new UnifiedDiffOutputBuilder; |
38 $this->header = $header; | 37 } elseif (\is_string($outputBuilder)) { |
39 $this->showNonDiffLines = $showNonDiffLines; | 38 // PHPUnit 6.1.4, 6.2.0, 6.2.1, 6.2.2, and 6.2.3 support |
39 // @see https://github.com/sebastianbergmann/phpunit/issues/2734#issuecomment-314514056 | |
40 // @deprecated | |
41 $this->outputBuilder = new UnifiedDiffOutputBuilder($outputBuilder); | |
42 } else { | |
43 throw new InvalidArgumentException( | |
44 \sprintf( | |
45 'Expected builder to be an instance of DiffOutputBuilderInterface, <null> or a string, got %s.', | |
46 \is_object($outputBuilder) ? 'instance of "' . \get_class($outputBuilder) . '"' : \gettype($outputBuilder) . ' "' . $outputBuilder . '"' | |
47 ) | |
48 ); | |
49 } | |
40 } | 50 } |
41 | 51 |
42 /** | 52 /** |
43 * Returns the diff between two arrays or strings as string. | 53 * Returns the diff between two arrays or strings as string. |
44 * | 54 * |
45 * @param array|string $from | 55 * @param array|string $from |
46 * @param array|string $to | 56 * @param array|string $to |
47 * @param LongestCommonSubsequence $lcs | 57 * @param LongestCommonSubsequenceCalculator|null $lcs |
48 * | 58 * |
49 * @return string | 59 * @return string |
50 */ | 60 */ |
51 public function diff($from, $to, LongestCommonSubsequence $lcs = null) | 61 public function diff($from, $to, LongestCommonSubsequenceCalculator $lcs = null): string |
52 { | 62 { |
53 $from = $this->validateDiffInput($from); | 63 $from = $this->validateDiffInput($from); |
54 $to = $this->validateDiffInput($to); | 64 $to = $this->validateDiffInput($to); |
55 $diff = $this->diffToArray($from, $to, $lcs); | 65 $diff = $this->diffToArray($from, $to, $lcs); |
56 $old = $this->checkIfDiffInOld($diff); | 66 |
57 $start = isset($old[0]) ? $old[0] : 0; | 67 return $this->outputBuilder->getDiff($diff); |
58 $end = \count($diff); | |
59 | |
60 if ($tmp = \array_search($end, $old)) { | |
61 $end = $tmp; | |
62 } | |
63 | |
64 return $this->getBuffer($diff, $old, $start, $end); | |
65 } | 68 } |
66 | 69 |
67 /** | 70 /** |
68 * Casts variable to string if it is not a string or array. | 71 * Casts variable to string if it is not a string or array. |
69 * | 72 * |
70 * @param mixed $input | 73 * @param mixed $input |
71 * | 74 * |
72 * @return string | 75 * @return string |
73 */ | 76 */ |
74 private function validateDiffInput($input) | 77 private function validateDiffInput($input): string |
75 { | 78 { |
76 if (!\is_array($input) && !\is_string($input)) { | 79 if (!\is_array($input) && !\is_string($input)) { |
77 return (string) $input; | 80 return (string) $input; |
78 } | 81 } |
79 | 82 |
80 return $input; | 83 return $input; |
81 } | |
82 | |
83 /** | |
84 * Takes input of the diff array and returns the old array. | |
85 * Iterates through diff line by line, | |
86 * | |
87 * @param array $diff | |
88 * | |
89 * @return array | |
90 */ | |
91 private function checkIfDiffInOld(array $diff) | |
92 { | |
93 $inOld = false; | |
94 $i = 0; | |
95 $old = array(); | |
96 | |
97 foreach ($diff as $line) { | |
98 if ($line[1] === 0 /* OLD */) { | |
99 if ($inOld === false) { | |
100 $inOld = $i; | |
101 } | |
102 } elseif ($inOld !== false) { | |
103 if (($i - $inOld) > 5) { | |
104 $old[$inOld] = $i - 1; | |
105 } | |
106 | |
107 $inOld = false; | |
108 } | |
109 | |
110 ++$i; | |
111 } | |
112 | |
113 return $old; | |
114 } | |
115 | |
116 /** | |
117 * Generates buffer in string format, returning the patch. | |
118 * | |
119 * @param array $diff | |
120 * @param array $old | |
121 * @param int $start | |
122 * @param int $end | |
123 * | |
124 * @return string | |
125 */ | |
126 private function getBuffer(array $diff, array $old, $start, $end) | |
127 { | |
128 $buffer = $this->header; | |
129 | |
130 if (!isset($old[$start])) { | |
131 $buffer = $this->getDiffBufferElementNew($diff, $buffer, $start); | |
132 ++$start; | |
133 } | |
134 | |
135 for ($i = $start; $i < $end; $i++) { | |
136 if (isset($old[$i])) { | |
137 $i = $old[$i]; | |
138 $buffer = $this->getDiffBufferElementNew($diff, $buffer, $i); | |
139 } else { | |
140 $buffer = $this->getDiffBufferElement($diff, $buffer, $i); | |
141 } | |
142 } | |
143 | |
144 return $buffer; | |
145 } | |
146 | |
147 /** | |
148 * Gets individual buffer element. | |
149 * | |
150 * @param array $diff | |
151 * @param string $buffer | |
152 * @param int $diffIndex | |
153 * | |
154 * @return string | |
155 */ | |
156 private function getDiffBufferElement(array $diff, $buffer, $diffIndex) | |
157 { | |
158 if ($diff[$diffIndex][1] === 1 /* ADDED */) { | |
159 $buffer .= '+' . $diff[$diffIndex][0] . "\n"; | |
160 } elseif ($diff[$diffIndex][1] === 2 /* REMOVED */) { | |
161 $buffer .= '-' . $diff[$diffIndex][0] . "\n"; | |
162 } elseif ($this->showNonDiffLines === true) { | |
163 $buffer .= ' ' . $diff[$diffIndex][0] . "\n"; | |
164 } | |
165 | |
166 return $buffer; | |
167 } | |
168 | |
169 /** | |
170 * Gets individual buffer element with opening. | |
171 * | |
172 * @param array $diff | |
173 * @param string $buffer | |
174 * @param int $diffIndex | |
175 * | |
176 * @return string | |
177 */ | |
178 private function getDiffBufferElementNew(array $diff, $buffer, $diffIndex) | |
179 { | |
180 if ($this->showNonDiffLines === true) { | |
181 $buffer .= "@@ @@\n"; | |
182 } | |
183 | |
184 return $this->getDiffBufferElement($diff, $buffer, $diffIndex); | |
185 } | 84 } |
186 | 85 |
187 /** | 86 /** |
188 * Returns the diff between two arrays or strings as array. | 87 * Returns the diff between two arrays or strings as array. |
189 * | 88 * |
193 * | 92 * |
194 * - 2: REMOVED: $token was removed from $from | 93 * - 2: REMOVED: $token was removed from $from |
195 * - 1: ADDED: $token was added to $from | 94 * - 1: ADDED: $token was added to $from |
196 * - 0: OLD: $token is not changed in $to | 95 * - 0: OLD: $token is not changed in $to |
197 * | 96 * |
198 * @param array|string $from | 97 * @param array|string $from |
199 * @param array|string $to | 98 * @param array|string $to |
200 * @param LongestCommonSubsequence $lcs | 99 * @param LongestCommonSubsequenceCalculator $lcs |
201 * | 100 * |
202 * @return array | 101 * @return array |
203 */ | 102 */ |
204 public function diffToArray($from, $to, LongestCommonSubsequence $lcs = null) | 103 public function diffToArray($from, $to, LongestCommonSubsequenceCalculator $lcs = null): array |
205 { | 104 { |
206 if (\is_string($from)) { | 105 if (\is_string($from)) { |
207 $fromMatches = $this->getNewLineMatches($from); | 106 $from = $this->splitStringByLines($from); |
208 $from = $this->splitStringByLines($from); | 107 } elseif (!\is_array($from)) { |
209 } elseif (\is_array($from)) { | |
210 $fromMatches = array(); | |
211 } else { | |
212 throw new \InvalidArgumentException('"from" must be an array or string.'); | 108 throw new \InvalidArgumentException('"from" must be an array or string.'); |
213 } | 109 } |
214 | 110 |
215 if (\is_string($to)) { | 111 if (\is_string($to)) { |
216 $toMatches = $this->getNewLineMatches($to); | 112 $to = $this->splitStringByLines($to); |
217 $to = $this->splitStringByLines($to); | 113 } elseif (!\is_array($to)) { |
218 } elseif (\is_array($to)) { | |
219 $toMatches = array(); | |
220 } else { | |
221 throw new \InvalidArgumentException('"to" must be an array or string.'); | 114 throw new \InvalidArgumentException('"to" must be an array or string.'); |
222 } | 115 } |
223 | 116 |
224 list($from, $to, $start, $end) = self::getArrayDiffParted($from, $to); | 117 list($from, $to, $start, $end) = self::getArrayDiffParted($from, $to); |
225 | 118 |
226 if ($lcs === null) { | 119 if ($lcs === null) { |
227 $lcs = $this->selectLcsImplementation($from, $to); | 120 $lcs = $this->selectLcsImplementation($from, $to); |
228 } | 121 } |
229 | 122 |
230 $common = $lcs->calculate(\array_values($from), \array_values($to)); | 123 $common = $lcs->calculate(\array_values($from), \array_values($to)); |
231 $diff = array(); | 124 $diff = []; |
232 | |
233 if ($this->detectUnmatchedLineEndings($fromMatches, $toMatches)) { | |
234 $diff[] = array( | |
235 '#Warning: Strings contain different line endings!', | |
236 0 | |
237 ); | |
238 } | |
239 | 125 |
240 foreach ($start as $token) { | 126 foreach ($start as $token) { |
241 $diff[] = array($token, 0 /* OLD */); | 127 $diff[] = [$token, 0 /* OLD */]; |
242 } | 128 } |
243 | 129 |
244 \reset($from); | 130 \reset($from); |
245 \reset($to); | 131 \reset($to); |
246 | 132 |
247 foreach ($common as $token) { | 133 foreach ($common as $token) { |
248 while (($fromToken = \reset($from)) !== $token) { | 134 while (($fromToken = \reset($from)) !== $token) { |
249 $diff[] = array(\array_shift($from), 2 /* REMOVED */); | 135 $diff[] = [\array_shift($from), 2 /* REMOVED */]; |
250 } | 136 } |
251 | 137 |
252 while (($toToken = \reset($to)) !== $token) { | 138 while (($toToken = \reset($to)) !== $token) { |
253 $diff[] = array(\array_shift($to), 1 /* ADDED */); | 139 $diff[] = [\array_shift($to), 1 /* ADDED */]; |
254 } | 140 } |
255 | 141 |
256 $diff[] = array($token, 0 /* OLD */); | 142 $diff[] = [$token, 0 /* OLD */]; |
257 | 143 |
258 \array_shift($from); | 144 \array_shift($from); |
259 \array_shift($to); | 145 \array_shift($to); |
260 } | 146 } |
261 | 147 |
262 while (($token = \array_shift($from)) !== null) { | 148 while (($token = \array_shift($from)) !== null) { |
263 $diff[] = array($token, 2 /* REMOVED */); | 149 $diff[] = [$token, 2 /* REMOVED */]; |
264 } | 150 } |
265 | 151 |
266 while (($token = \array_shift($to)) !== null) { | 152 while (($token = \array_shift($to)) !== null) { |
267 $diff[] = array($token, 1 /* ADDED */); | 153 $diff[] = [$token, 1 /* ADDED */]; |
268 } | 154 } |
269 | 155 |
270 foreach ($end as $token) { | 156 foreach ($end as $token) { |
271 $diff[] = array($token, 0 /* OLD */); | 157 $diff[] = [$token, 0 /* OLD */]; |
158 } | |
159 | |
160 if ($this->detectUnmatchedLineEndings($diff)) { | |
161 \array_unshift($diff, ["#Warning: Strings contain different line endings!\n", 3]); | |
272 } | 162 } |
273 | 163 |
274 return $diff; | 164 return $diff; |
275 } | 165 } |
276 | 166 |
277 /** | 167 /** |
278 * Get new strings denoting new lines from a given string. | 168 * Checks if input is string, if so it will split it line-by-line. |
279 * | 169 * |
280 * @param string $string | 170 * @param string $input |
281 * | 171 * |
282 * @return array | 172 * @return array |
283 */ | 173 */ |
284 private function getNewLineMatches($string) | 174 private function splitStringByLines(string $input): array |
285 { | 175 { |
286 \preg_match_all('(\r\n|\r|\n)', $string, $stringMatches); | 176 return \preg_split('/(.*\R)/', $input, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); |
287 | |
288 return $stringMatches; | |
289 } | |
290 | |
291 /** | |
292 * Checks if input is string, if so it will split it line-by-line. | |
293 * | |
294 * @param string $input | |
295 * | |
296 * @return array | |
297 */ | |
298 private function splitStringByLines($input) | |
299 { | |
300 return \preg_split('(\r\n|\r|\n)', $input); | |
301 } | 177 } |
302 | 178 |
303 /** | 179 /** |
304 * @param array $from | 180 * @param array $from |
305 * @param array $to | 181 * @param array $to |
306 * | 182 * |
307 * @return LongestCommonSubsequence | 183 * @return LongestCommonSubsequenceCalculator |
308 */ | 184 */ |
309 private function selectLcsImplementation(array $from, array $to) | 185 private function selectLcsImplementation(array $from, array $to): LongestCommonSubsequenceCalculator |
310 { | 186 { |
311 // We do not want to use the time-efficient implementation if its memory | 187 // We do not want to use the time-efficient implementation if its memory |
312 // footprint will probably exceed this value. Note that the footprint | 188 // footprint will probably exceed this value. Note that the footprint |
313 // calculation is only an estimation for the matrix and the LCS method | 189 // calculation is only an estimation for the matrix and the LCS method |
314 // will typically allocate a bit more memory than this. | 190 // will typically allocate a bit more memory than this. |
315 $memoryLimit = 100 * 1024 * 1024; | 191 $memoryLimit = 100 * 1024 * 1024; |
316 | 192 |
317 if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) { | 193 if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) { |
318 return new MemoryEfficientImplementation; | 194 return new MemoryEfficientLongestCommonSubsequenceCalculator; |
319 } | 195 } |
320 | 196 |
321 return new TimeEfficientImplementation; | 197 return new TimeEfficientLongestCommonSubsequenceCalculator; |
322 } | 198 } |
323 | 199 |
324 /** | 200 /** |
325 * Calculates the estimated memory footprint for the DP-based method. | 201 * Calculates the estimated memory footprint for the DP-based method. |
326 * | 202 * |
331 */ | 207 */ |
332 private function calculateEstimatedFootprint(array $from, array $to) | 208 private function calculateEstimatedFootprint(array $from, array $to) |
333 { | 209 { |
334 $itemSize = PHP_INT_SIZE === 4 ? 76 : 144; | 210 $itemSize = PHP_INT_SIZE === 4 ? 76 : 144; |
335 | 211 |
336 return $itemSize * \pow(\min(\count($from), \count($to)), 2); | 212 return $itemSize * \min(\count($from), \count($to)) ** 2; |
337 } | 213 } |
338 | 214 |
339 /** | 215 /** |
340 * Returns true if line ends don't match on fromMatches and toMatches. | 216 * Returns true if line ends don't match in a diff. |
341 * | 217 * |
342 * @param array $fromMatches | 218 * @param array $diff |
343 * @param array $toMatches | |
344 * | 219 * |
345 * @return bool | 220 * @return bool |
346 */ | 221 */ |
347 private function detectUnmatchedLineEndings(array $fromMatches, array $toMatches) | 222 private function detectUnmatchedLineEndings(array $diff): bool |
348 { | 223 { |
349 return isset($fromMatches[0], $toMatches[0]) && | 224 $newLineBreaks = ['' => true]; |
350 \count($fromMatches[0]) === \count($toMatches[0]) && | 225 $oldLineBreaks = ['' => true]; |
351 $fromMatches[0] !== $toMatches[0]; | 226 |
352 } | 227 foreach ($diff as $entry) { |
353 | 228 if (0 === $entry[1]) { /* OLD */ |
354 /** | 229 $ln = $this->getLinebreak($entry[0]); |
355 * @param array $from | 230 $oldLineBreaks[$ln] = true; |
356 * @param array $to | 231 $newLineBreaks[$ln] = true; |
357 * | 232 } elseif (1 === $entry[1]) { /* ADDED */ |
358 * @return array | 233 $newLineBreaks[$this->getLinebreak($entry[0])] = true; |
359 */ | 234 } elseif (2 === $entry[1]) { /* REMOVED */ |
360 private static function getArrayDiffParted(array &$from, array &$to) | 235 $oldLineBreaks[$this->getLinebreak($entry[0])] = true; |
361 { | 236 } |
362 $start = array(); | 237 } |
363 $end = array(); | 238 |
239 // if either input or output is a single line without breaks than no warning should be raised | |
240 if (['' => true] === $newLineBreaks || ['' => true] === $oldLineBreaks) { | |
241 return false; | |
242 } | |
243 | |
244 // two way compare | |
245 foreach ($newLineBreaks as $break => $set) { | |
246 if (!isset($oldLineBreaks[$break])) { | |
247 return true; | |
248 } | |
249 } | |
250 | |
251 foreach ($oldLineBreaks as $break => $set) { | |
252 if (!isset($newLineBreaks[$break])) { | |
253 return true; | |
254 } | |
255 } | |
256 | |
257 return false; | |
258 } | |
259 | |
260 private function getLinebreak($line): string | |
261 { | |
262 if (!\is_string($line)) { | |
263 return ''; | |
264 } | |
265 | |
266 $lc = \substr($line, -1); | |
267 if ("\r" === $lc) { | |
268 return "\r"; | |
269 } | |
270 | |
271 if ("\n" !== $lc) { | |
272 return ''; | |
273 } | |
274 | |
275 if ("\r\n" === \substr($line, -2)) { | |
276 return "\r\n"; | |
277 } | |
278 | |
279 return "\n"; | |
280 } | |
281 | |
282 private static function getArrayDiffParted(array &$from, array &$to): array | |
283 { | |
284 $start = []; | |
285 $end = []; | |
364 | 286 |
365 \reset($to); | 287 \reset($to); |
366 | 288 |
367 foreach ($from as $k => $v) { | 289 foreach ($from as $k => $v) { |
368 $toK = \key($to); | 290 $toK = \key($to); |
388 } | 310 } |
389 | 311 |
390 \prev($from); | 312 \prev($from); |
391 \prev($to); | 313 \prev($to); |
392 | 314 |
393 $end = array($fromK => $from[$fromK]) + $end; | 315 $end = [$fromK => $from[$fromK]] + $end; |
394 unset($from[$fromK], $to[$toK]); | 316 unset($from[$fromK], $to[$toK]); |
395 } while (true); | 317 } while (true); |
396 | 318 |
397 return array($from, $to, $start, $end); | 319 return [$from, $to, $start, $end]; |
398 } | 320 } |
399 } | 321 } |