annotate vendor/sebastian/diff/src/Differ.php @ 5:12f9dff5fda9 tip

Update to Drupal core 8.7.1
author Chris Cannam
date Thu, 09 May 2019 15:34:47 +0100
parents 5311817fb629
children
rev   line source
Chris@2 1 <?php declare(strict_types=1);
Chris@0 2 /*
Chris@2 3 * This file is part of sebastian/diff.
Chris@0 4 *
Chris@0 5 * (c) Sebastian Bergmann <sebastian@phpunit.de>
Chris@0 6 *
Chris@0 7 * For the full copyright and license information, please view the LICENSE
Chris@0 8 * file that was distributed with this source code.
Chris@0 9 */
Chris@0 10
Chris@0 11 namespace SebastianBergmann\Diff;
Chris@0 12
Chris@2 13 use SebastianBergmann\Diff\Output\DiffOutputBuilderInterface;
Chris@2 14 use SebastianBergmann\Diff\Output\UnifiedDiffOutputBuilder;
Chris@0 15
Chris@0 16 /**
Chris@0 17 * Diff implementation.
Chris@0 18 */
Chris@2 19 final class Differ
Chris@0 20 {
Chris@0 21 /**
Chris@2 22 * @var DiffOutputBuilderInterface
Chris@0 23 */
Chris@2 24 private $outputBuilder;
Chris@0 25
Chris@0 26 /**
Chris@2 27 * @param DiffOutputBuilderInterface $outputBuilder
Chris@2 28 *
Chris@2 29 * @throws InvalidArgumentException
Chris@0 30 */
Chris@2 31 public function __construct($outputBuilder = null)
Chris@0 32 {
Chris@2 33 if ($outputBuilder instanceof DiffOutputBuilderInterface) {
Chris@2 34 $this->outputBuilder = $outputBuilder;
Chris@2 35 } elseif (null === $outputBuilder) {
Chris@2 36 $this->outputBuilder = new UnifiedDiffOutputBuilder;
Chris@2 37 } elseif (\is_string($outputBuilder)) {
Chris@2 38 // PHPUnit 6.1.4, 6.2.0, 6.2.1, 6.2.2, and 6.2.3 support
Chris@2 39 // @see https://github.com/sebastianbergmann/phpunit/issues/2734#issuecomment-314514056
Chris@2 40 // @deprecated
Chris@2 41 $this->outputBuilder = new UnifiedDiffOutputBuilder($outputBuilder);
Chris@2 42 } else {
Chris@2 43 throw new InvalidArgumentException(
Chris@2 44 \sprintf(
Chris@2 45 'Expected builder to be an instance of DiffOutputBuilderInterface, <null> or a string, got %s.',
Chris@2 46 \is_object($outputBuilder) ? 'instance of "' . \get_class($outputBuilder) . '"' : \gettype($outputBuilder) . ' "' . $outputBuilder . '"'
Chris@2 47 )
Chris@2 48 );
Chris@2 49 }
Chris@0 50 }
Chris@0 51
Chris@0 52 /**
Chris@0 53 * Returns the diff between two arrays or strings as string.
Chris@0 54 *
Chris@2 55 * @param array|string $from
Chris@2 56 * @param array|string $to
Chris@2 57 * @param LongestCommonSubsequenceCalculator|null $lcs
Chris@0 58 *
Chris@0 59 * @return string
Chris@0 60 */
Chris@2 61 public function diff($from, $to, LongestCommonSubsequenceCalculator $lcs = null): string
Chris@0 62 {
Chris@2 63 $from = $this->validateDiffInput($from);
Chris@2 64 $to = $this->validateDiffInput($to);
Chris@2 65 $diff = $this->diffToArray($from, $to, $lcs);
Chris@2 66
Chris@2 67 return $this->outputBuilder->getDiff($diff);
Chris@2 68 }
Chris@2 69
Chris@2 70 /**
Chris@2 71 * Casts variable to string if it is not a string or array.
Chris@2 72 *
Chris@2 73 * @param mixed $input
Chris@2 74 *
Chris@2 75 * @return string
Chris@2 76 */
Chris@2 77 private function validateDiffInput($input): string
Chris@2 78 {
Chris@2 79 if (!\is_array($input) && !\is_string($input)) {
Chris@2 80 return (string) $input;
Chris@0 81 }
Chris@0 82
Chris@2 83 return $input;
Chris@0 84 }
Chris@0 85
Chris@0 86 /**
Chris@0 87 * Returns the diff between two arrays or strings as array.
Chris@0 88 *
Chris@0 89 * Each array element contains two elements:
Chris@2 90 * - [0] => mixed $token
Chris@0 91 * - [1] => 2|1|0
Chris@0 92 *
Chris@0 93 * - 2: REMOVED: $token was removed from $from
Chris@0 94 * - 1: ADDED: $token was added to $from
Chris@0 95 * - 0: OLD: $token is not changed in $to
Chris@0 96 *
Chris@2 97 * @param array|string $from
Chris@2 98 * @param array|string $to
Chris@2 99 * @param LongestCommonSubsequenceCalculator $lcs
Chris@0 100 *
Chris@0 101 * @return array
Chris@0 102 */
Chris@2 103 public function diffToArray($from, $to, LongestCommonSubsequenceCalculator $lcs = null): array
Chris@0 104 {
Chris@2 105 if (\is_string($from)) {
Chris@2 106 $from = $this->splitStringByLines($from);
Chris@2 107 } elseif (!\is_array($from)) {
Chris@2 108 throw new \InvalidArgumentException('"from" must be an array or string.');
Chris@0 109 }
Chris@0 110
Chris@2 111 if (\is_string($to)) {
Chris@2 112 $to = $this->splitStringByLines($to);
Chris@2 113 } elseif (!\is_array($to)) {
Chris@2 114 throw new \InvalidArgumentException('"to" must be an array or string.');
Chris@0 115 }
Chris@0 116
Chris@2 117 list($from, $to, $start, $end) = self::getArrayDiffParted($from, $to);
Chris@0 118
Chris@0 119 if ($lcs === null) {
Chris@0 120 $lcs = $this->selectLcsImplementation($from, $to);
Chris@0 121 }
Chris@0 122
Chris@2 123 $common = $lcs->calculate(\array_values($from), \array_values($to));
Chris@2 124 $diff = [];
Chris@0 125
Chris@2 126 foreach ($start as $token) {
Chris@2 127 $diff[] = [$token, 0 /* OLD */];
Chris@0 128 }
Chris@0 129
Chris@2 130 \reset($from);
Chris@2 131 \reset($to);
Chris@2 132
Chris@2 133 foreach ($common as $token) {
Chris@2 134 while (($fromToken = \reset($from)) !== $token) {
Chris@2 135 $diff[] = [\array_shift($from), 2 /* REMOVED */];
Chris@2 136 }
Chris@2 137
Chris@2 138 while (($toToken = \reset($to)) !== $token) {
Chris@2 139 $diff[] = [\array_shift($to), 1 /* ADDED */];
Chris@2 140 }
Chris@2 141
Chris@2 142 $diff[] = [$token, 0 /* OLD */];
Chris@2 143
Chris@2 144 \array_shift($from);
Chris@2 145 \array_shift($to);
Chris@0 146 }
Chris@0 147
Chris@2 148 while (($token = \array_shift($from)) !== null) {
Chris@2 149 $diff[] = [$token, 2 /* REMOVED */];
Chris@0 150 }
Chris@0 151
Chris@2 152 while (($token = \array_shift($to)) !== null) {
Chris@2 153 $diff[] = [$token, 1 /* ADDED */];
Chris@0 154 }
Chris@0 155
Chris@0 156 foreach ($end as $token) {
Chris@2 157 $diff[] = [$token, 0 /* OLD */];
Chris@2 158 }
Chris@2 159
Chris@2 160 if ($this->detectUnmatchedLineEndings($diff)) {
Chris@2 161 \array_unshift($diff, ["#Warning: Strings contain different line endings!\n", 3]);
Chris@0 162 }
Chris@0 163
Chris@0 164 return $diff;
Chris@0 165 }
Chris@0 166
Chris@0 167 /**
Chris@2 168 * Checks if input is string, if so it will split it line-by-line.
Chris@2 169 *
Chris@2 170 * @param string $input
Chris@2 171 *
Chris@2 172 * @return array
Chris@2 173 */
Chris@2 174 private function splitStringByLines(string $input): array
Chris@2 175 {
Chris@2 176 return \preg_split('/(.*\R)/', $input, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
Chris@2 177 }
Chris@2 178
Chris@2 179 /**
Chris@0 180 * @param array $from
Chris@0 181 * @param array $to
Chris@0 182 *
Chris@2 183 * @return LongestCommonSubsequenceCalculator
Chris@0 184 */
Chris@2 185 private function selectLcsImplementation(array $from, array $to): LongestCommonSubsequenceCalculator
Chris@0 186 {
Chris@0 187 // We do not want to use the time-efficient implementation if its memory
Chris@0 188 // footprint will probably exceed this value. Note that the footprint
Chris@0 189 // calculation is only an estimation for the matrix and the LCS method
Chris@0 190 // will typically allocate a bit more memory than this.
Chris@0 191 $memoryLimit = 100 * 1024 * 1024;
Chris@0 192
Chris@0 193 if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) {
Chris@2 194 return new MemoryEfficientLongestCommonSubsequenceCalculator;
Chris@0 195 }
Chris@0 196
Chris@2 197 return new TimeEfficientLongestCommonSubsequenceCalculator;
Chris@0 198 }
Chris@0 199
Chris@0 200 /**
Chris@0 201 * Calculates the estimated memory footprint for the DP-based method.
Chris@0 202 *
Chris@0 203 * @param array $from
Chris@0 204 * @param array $to
Chris@0 205 *
Chris@2 206 * @return int|float
Chris@0 207 */
Chris@0 208 private function calculateEstimatedFootprint(array $from, array $to)
Chris@0 209 {
Chris@2 210 $itemSize = PHP_INT_SIZE === 4 ? 76 : 144;
Chris@0 211
Chris@2 212 return $itemSize * \min(\count($from), \count($to)) ** 2;
Chris@2 213 }
Chris@2 214
Chris@2 215 /**
Chris@2 216 * Returns true if line ends don't match in a diff.
Chris@2 217 *
Chris@2 218 * @param array $diff
Chris@2 219 *
Chris@2 220 * @return bool
Chris@2 221 */
Chris@2 222 private function detectUnmatchedLineEndings(array $diff): bool
Chris@2 223 {
Chris@2 224 $newLineBreaks = ['' => true];
Chris@2 225 $oldLineBreaks = ['' => true];
Chris@2 226
Chris@2 227 foreach ($diff as $entry) {
Chris@2 228 if (0 === $entry[1]) { /* OLD */
Chris@2 229 $ln = $this->getLinebreak($entry[0]);
Chris@2 230 $oldLineBreaks[$ln] = true;
Chris@2 231 $newLineBreaks[$ln] = true;
Chris@2 232 } elseif (1 === $entry[1]) { /* ADDED */
Chris@2 233 $newLineBreaks[$this->getLinebreak($entry[0])] = true;
Chris@2 234 } elseif (2 === $entry[1]) { /* REMOVED */
Chris@2 235 $oldLineBreaks[$this->getLinebreak($entry[0])] = true;
Chris@2 236 }
Chris@2 237 }
Chris@2 238
Chris@2 239 // if either input or output is a single line without breaks than no warning should be raised
Chris@2 240 if (['' => true] === $newLineBreaks || ['' => true] === $oldLineBreaks) {
Chris@2 241 return false;
Chris@2 242 }
Chris@2 243
Chris@2 244 // two way compare
Chris@2 245 foreach ($newLineBreaks as $break => $set) {
Chris@2 246 if (!isset($oldLineBreaks[$break])) {
Chris@2 247 return true;
Chris@2 248 }
Chris@2 249 }
Chris@2 250
Chris@2 251 foreach ($oldLineBreaks as $break => $set) {
Chris@2 252 if (!isset($newLineBreaks[$break])) {
Chris@2 253 return true;
Chris@2 254 }
Chris@2 255 }
Chris@2 256
Chris@2 257 return false;
Chris@2 258 }
Chris@2 259
Chris@2 260 private function getLinebreak($line): string
Chris@2 261 {
Chris@2 262 if (!\is_string($line)) {
Chris@2 263 return '';
Chris@2 264 }
Chris@2 265
Chris@2 266 $lc = \substr($line, -1);
Chris@2 267 if ("\r" === $lc) {
Chris@2 268 return "\r";
Chris@2 269 }
Chris@2 270
Chris@2 271 if ("\n" !== $lc) {
Chris@2 272 return '';
Chris@2 273 }
Chris@2 274
Chris@2 275 if ("\r\n" === \substr($line, -2)) {
Chris@2 276 return "\r\n";
Chris@2 277 }
Chris@2 278
Chris@2 279 return "\n";
Chris@2 280 }
Chris@2 281
Chris@2 282 private static function getArrayDiffParted(array &$from, array &$to): array
Chris@2 283 {
Chris@2 284 $start = [];
Chris@2 285 $end = [];
Chris@2 286
Chris@2 287 \reset($to);
Chris@2 288
Chris@2 289 foreach ($from as $k => $v) {
Chris@2 290 $toK = \key($to);
Chris@2 291
Chris@2 292 if ($toK === $k && $v === $to[$k]) {
Chris@2 293 $start[$k] = $v;
Chris@2 294
Chris@2 295 unset($from[$k], $to[$k]);
Chris@2 296 } else {
Chris@2 297 break;
Chris@2 298 }
Chris@2 299 }
Chris@2 300
Chris@2 301 \end($from);
Chris@2 302 \end($to);
Chris@2 303
Chris@2 304 do {
Chris@2 305 $fromK = \key($from);
Chris@2 306 $toK = \key($to);
Chris@2 307
Chris@2 308 if (null === $fromK || null === $toK || \current($from) !== \current($to)) {
Chris@2 309 break;
Chris@2 310 }
Chris@2 311
Chris@2 312 \prev($from);
Chris@2 313 \prev($to);
Chris@2 314
Chris@2 315 $end = [$fromK => $from[$fromK]] + $end;
Chris@2 316 unset($from[$fromK], $to[$toK]);
Chris@2 317 } while (true);
Chris@2 318
Chris@2 319 return [$from, $to, $start, $end];
Chris@0 320 }
Chris@0 321 }