annotate vendor/sebastian/diff/src/Differ.php @ 19:fa3358dc1485 tip

Add ndrum files
author Chris Cannam
date Wed, 28 Aug 2019 13:14:47 +0100
parents 1fec387a4317
children
rev   line source
Chris@14 1 <?php declare(strict_types=1);
Chris@0 2 /*
Chris@12 3 * This file is part of sebastian/diff.
Chris@0 4 *
Chris@0 5 * (c) Sebastian Bergmann <sebastian@phpunit.de>
Chris@0 6 *
Chris@0 7 * For the full copyright and license information, please view the LICENSE
Chris@0 8 * file that was distributed with this source code.
Chris@0 9 */
Chris@0 10
Chris@0 11 namespace SebastianBergmann\Diff;
Chris@0 12
Chris@14 13 use SebastianBergmann\Diff\Output\DiffOutputBuilderInterface;
Chris@14 14 use SebastianBergmann\Diff\Output\UnifiedDiffOutputBuilder;
Chris@0 15
Chris@0 16 /**
Chris@0 17 * Diff implementation.
Chris@0 18 */
Chris@14 19 final class Differ
Chris@0 20 {
Chris@0 21 /**
Chris@14 22 * @var DiffOutputBuilderInterface
Chris@0 23 */
Chris@14 24 private $outputBuilder;
Chris@0 25
Chris@0 26 /**
Chris@14 27 * @param DiffOutputBuilderInterface $outputBuilder
Chris@14 28 *
Chris@14 29 * @throws InvalidArgumentException
Chris@0 30 */
Chris@14 31 public function __construct($outputBuilder = null)
Chris@0 32 {
Chris@14 33 if ($outputBuilder instanceof DiffOutputBuilderInterface) {
Chris@14 34 $this->outputBuilder = $outputBuilder;
Chris@14 35 } elseif (null === $outputBuilder) {
Chris@14 36 $this->outputBuilder = new UnifiedDiffOutputBuilder;
Chris@14 37 } elseif (\is_string($outputBuilder)) {
Chris@14 38 // PHPUnit 6.1.4, 6.2.0, 6.2.1, 6.2.2, and 6.2.3 support
Chris@14 39 // @see https://github.com/sebastianbergmann/phpunit/issues/2734#issuecomment-314514056
Chris@14 40 // @deprecated
Chris@14 41 $this->outputBuilder = new UnifiedDiffOutputBuilder($outputBuilder);
Chris@14 42 } else {
Chris@14 43 throw new InvalidArgumentException(
Chris@14 44 \sprintf(
Chris@14 45 'Expected builder to be an instance of DiffOutputBuilderInterface, <null> or a string, got %s.',
Chris@14 46 \is_object($outputBuilder) ? 'instance of "' . \get_class($outputBuilder) . '"' : \gettype($outputBuilder) . ' "' . $outputBuilder . '"'
Chris@14 47 )
Chris@14 48 );
Chris@14 49 }
Chris@0 50 }
Chris@0 51
Chris@0 52 /**
Chris@0 53 * Returns the diff between two arrays or strings as string.
Chris@0 54 *
Chris@14 55 * @param array|string $from
Chris@14 56 * @param array|string $to
Chris@14 57 * @param LongestCommonSubsequenceCalculator|null $lcs
Chris@0 58 *
Chris@0 59 * @return string
Chris@0 60 */
Chris@14 61 public function diff($from, $to, LongestCommonSubsequenceCalculator $lcs = null): string
Chris@0 62 {
Chris@14 63 $from = $this->validateDiffInput($from);
Chris@14 64 $to = $this->validateDiffInput($to);
Chris@14 65 $diff = $this->diffToArray($from, $to, $lcs);
Chris@12 66
Chris@14 67 return $this->outputBuilder->getDiff($diff);
Chris@12 68 }
Chris@12 69
Chris@12 70 /**
Chris@12 71 * Casts variable to string if it is not a string or array.
Chris@12 72 *
Chris@12 73 * @param mixed $input
Chris@12 74 *
Chris@12 75 * @return string
Chris@12 76 */
Chris@14 77 private function validateDiffInput($input): string
Chris@12 78 {
Chris@12 79 if (!\is_array($input) && !\is_string($input)) {
Chris@12 80 return (string) $input;
Chris@0 81 }
Chris@0 82
Chris@12 83 return $input;
Chris@12 84 }
Chris@0 85
Chris@12 86 /**
Chris@0 87 * Returns the diff between two arrays or strings as array.
Chris@0 88 *
Chris@0 89 * Each array element contains two elements:
Chris@12 90 * - [0] => mixed $token
Chris@0 91 * - [1] => 2|1|0
Chris@0 92 *
Chris@0 93 * - 2: REMOVED: $token was removed from $from
Chris@0 94 * - 1: ADDED: $token was added to $from
Chris@0 95 * - 0: OLD: $token is not changed in $to
Chris@0 96 *
Chris@14 97 * @param array|string $from
Chris@14 98 * @param array|string $to
Chris@14 99 * @param LongestCommonSubsequenceCalculator $lcs
Chris@0 100 *
Chris@0 101 * @return array
Chris@0 102 */
Chris@14 103 public function diffToArray($from, $to, LongestCommonSubsequenceCalculator $lcs = null): array
Chris@0 104 {
Chris@12 105 if (\is_string($from)) {
Chris@14 106 $from = $this->splitStringByLines($from);
Chris@14 107 } elseif (!\is_array($from)) {
Chris@12 108 throw new \InvalidArgumentException('"from" must be an array or string.');
Chris@0 109 }
Chris@0 110
Chris@12 111 if (\is_string($to)) {
Chris@14 112 $to = $this->splitStringByLines($to);
Chris@14 113 } elseif (!\is_array($to)) {
Chris@12 114 throw new \InvalidArgumentException('"to" must be an array or string.');
Chris@0 115 }
Chris@0 116
Chris@12 117 list($from, $to, $start, $end) = self::getArrayDiffParted($from, $to);
Chris@0 118
Chris@0 119 if ($lcs === null) {
Chris@0 120 $lcs = $this->selectLcsImplementation($from, $to);
Chris@0 121 }
Chris@0 122
Chris@12 123 $common = $lcs->calculate(\array_values($from), \array_values($to));
Chris@14 124 $diff = [];
Chris@0 125
Chris@0 126 foreach ($start as $token) {
Chris@14 127 $diff[] = [$token, 0 /* OLD */];
Chris@0 128 }
Chris@0 129
Chris@12 130 \reset($from);
Chris@12 131 \reset($to);
Chris@0 132
Chris@0 133 foreach ($common as $token) {
Chris@12 134 while (($fromToken = \reset($from)) !== $token) {
Chris@14 135 $diff[] = [\array_shift($from), 2 /* REMOVED */];
Chris@0 136 }
Chris@0 137
Chris@12 138 while (($toToken = \reset($to)) !== $token) {
Chris@14 139 $diff[] = [\array_shift($to), 1 /* ADDED */];
Chris@0 140 }
Chris@0 141
Chris@14 142 $diff[] = [$token, 0 /* OLD */];
Chris@0 143
Chris@12 144 \array_shift($from);
Chris@12 145 \array_shift($to);
Chris@0 146 }
Chris@0 147
Chris@12 148 while (($token = \array_shift($from)) !== null) {
Chris@14 149 $diff[] = [$token, 2 /* REMOVED */];
Chris@0 150 }
Chris@0 151
Chris@12 152 while (($token = \array_shift($to)) !== null) {
Chris@14 153 $diff[] = [$token, 1 /* ADDED */];
Chris@0 154 }
Chris@0 155
Chris@0 156 foreach ($end as $token) {
Chris@14 157 $diff[] = [$token, 0 /* OLD */];
Chris@14 158 }
Chris@14 159
Chris@14 160 if ($this->detectUnmatchedLineEndings($diff)) {
Chris@14 161 \array_unshift($diff, ["#Warning: Strings contain different line endings!\n", 3]);
Chris@0 162 }
Chris@0 163
Chris@0 164 return $diff;
Chris@0 165 }
Chris@0 166
Chris@0 167 /**
Chris@12 168 * Checks if input is string, if so it will split it line-by-line.
Chris@12 169 *
Chris@12 170 * @param string $input
Chris@12 171 *
Chris@12 172 * @return array
Chris@12 173 */
Chris@14 174 private function splitStringByLines(string $input): array
Chris@12 175 {
Chris@14 176 return \preg_split('/(.*\R)/', $input, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
Chris@12 177 }
Chris@12 178
Chris@12 179 /**
Chris@0 180 * @param array $from
Chris@0 181 * @param array $to
Chris@0 182 *
Chris@14 183 * @return LongestCommonSubsequenceCalculator
Chris@0 184 */
Chris@14 185 private function selectLcsImplementation(array $from, array $to): LongestCommonSubsequenceCalculator
Chris@0 186 {
Chris@0 187 // We do not want to use the time-efficient implementation if its memory
Chris@0 188 // footprint will probably exceed this value. Note that the footprint
Chris@0 189 // calculation is only an estimation for the matrix and the LCS method
Chris@0 190 // will typically allocate a bit more memory than this.
Chris@0 191 $memoryLimit = 100 * 1024 * 1024;
Chris@0 192
Chris@0 193 if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) {
Chris@14 194 return new MemoryEfficientLongestCommonSubsequenceCalculator;
Chris@0 195 }
Chris@0 196
Chris@14 197 return new TimeEfficientLongestCommonSubsequenceCalculator;
Chris@0 198 }
Chris@0 199
Chris@0 200 /**
Chris@0 201 * Calculates the estimated memory footprint for the DP-based method.
Chris@0 202 *
Chris@0 203 * @param array $from
Chris@0 204 * @param array $to
Chris@0 205 *
Chris@12 206 * @return int|float
Chris@0 207 */
Chris@0 208 private function calculateEstimatedFootprint(array $from, array $to)
Chris@0 209 {
Chris@12 210 $itemSize = PHP_INT_SIZE === 4 ? 76 : 144;
Chris@0 211
Chris@14 212 return $itemSize * \min(\count($from), \count($to)) ** 2;
Chris@12 213 }
Chris@12 214
Chris@12 215 /**
Chris@14 216 * Returns true if line ends don't match in a diff.
Chris@12 217 *
Chris@14 218 * @param array $diff
Chris@12 219 *
Chris@12 220 * @return bool
Chris@12 221 */
Chris@14 222 private function detectUnmatchedLineEndings(array $diff): bool
Chris@12 223 {
Chris@14 224 $newLineBreaks = ['' => true];
Chris@14 225 $oldLineBreaks = ['' => true];
Chris@14 226
Chris@14 227 foreach ($diff as $entry) {
Chris@14 228 if (0 === $entry[1]) { /* OLD */
Chris@14 229 $ln = $this->getLinebreak($entry[0]);
Chris@14 230 $oldLineBreaks[$ln] = true;
Chris@14 231 $newLineBreaks[$ln] = true;
Chris@14 232 } elseif (1 === $entry[1]) { /* ADDED */
Chris@14 233 $newLineBreaks[$this->getLinebreak($entry[0])] = true;
Chris@14 234 } elseif (2 === $entry[1]) { /* REMOVED */
Chris@14 235 $oldLineBreaks[$this->getLinebreak($entry[0])] = true;
Chris@14 236 }
Chris@14 237 }
Chris@14 238
Chris@14 239 // if either input or output is a single line without breaks than no warning should be raised
Chris@14 240 if (['' => true] === $newLineBreaks || ['' => true] === $oldLineBreaks) {
Chris@14 241 return false;
Chris@14 242 }
Chris@14 243
Chris@14 244 // two way compare
Chris@14 245 foreach ($newLineBreaks as $break => $set) {
Chris@14 246 if (!isset($oldLineBreaks[$break])) {
Chris@14 247 return true;
Chris@14 248 }
Chris@14 249 }
Chris@14 250
Chris@14 251 foreach ($oldLineBreaks as $break => $set) {
Chris@14 252 if (!isset($newLineBreaks[$break])) {
Chris@14 253 return true;
Chris@14 254 }
Chris@14 255 }
Chris@14 256
Chris@14 257 return false;
Chris@12 258 }
Chris@12 259
Chris@14 260 private function getLinebreak($line): string
Chris@12 261 {
Chris@14 262 if (!\is_string($line)) {
Chris@14 263 return '';
Chris@14 264 }
Chris@14 265
Chris@14 266 $lc = \substr($line, -1);
Chris@14 267 if ("\r" === $lc) {
Chris@14 268 return "\r";
Chris@14 269 }
Chris@14 270
Chris@14 271 if ("\n" !== $lc) {
Chris@14 272 return '';
Chris@14 273 }
Chris@14 274
Chris@14 275 if ("\r\n" === \substr($line, -2)) {
Chris@14 276 return "\r\n";
Chris@14 277 }
Chris@14 278
Chris@14 279 return "\n";
Chris@14 280 }
Chris@14 281
Chris@14 282 private static function getArrayDiffParted(array &$from, array &$to): array
Chris@14 283 {
Chris@14 284 $start = [];
Chris@14 285 $end = [];
Chris@12 286
Chris@12 287 \reset($to);
Chris@12 288
Chris@12 289 foreach ($from as $k => $v) {
Chris@12 290 $toK = \key($to);
Chris@12 291
Chris@12 292 if ($toK === $k && $v === $to[$k]) {
Chris@12 293 $start[$k] = $v;
Chris@12 294
Chris@12 295 unset($from[$k], $to[$k]);
Chris@12 296 } else {
Chris@12 297 break;
Chris@12 298 }
Chris@12 299 }
Chris@12 300
Chris@12 301 \end($from);
Chris@12 302 \end($to);
Chris@12 303
Chris@12 304 do {
Chris@12 305 $fromK = \key($from);
Chris@12 306 $toK = \key($to);
Chris@12 307
Chris@12 308 if (null === $fromK || null === $toK || \current($from) !== \current($to)) {
Chris@12 309 break;
Chris@12 310 }
Chris@12 311
Chris@12 312 \prev($from);
Chris@12 313 \prev($to);
Chris@12 314
Chris@14 315 $end = [$fromK => $from[$fromK]] + $end;
Chris@12 316 unset($from[$fromK], $to[$toK]);
Chris@12 317 } while (true);
Chris@12 318
Chris@14 319 return [$from, $to, $start, $end];
Chris@0 320 }
Chris@0 321 }