Chris@14: Chris@0: * Chris@0: * For the full copyright and license information, please view the LICENSE Chris@0: * file that was distributed with this source code. Chris@0: */ Chris@0: Chris@0: namespace SebastianBergmann\Diff; Chris@0: Chris@14: use SebastianBergmann\Diff\Output\DiffOutputBuilderInterface; Chris@14: use SebastianBergmann\Diff\Output\UnifiedDiffOutputBuilder; Chris@0: Chris@0: /** Chris@0: * Diff implementation. Chris@0: */ Chris@14: final class Differ Chris@0: { Chris@0: /** Chris@14: * @var DiffOutputBuilderInterface Chris@0: */ Chris@14: private $outputBuilder; Chris@0: Chris@0: /** Chris@14: * @param DiffOutputBuilderInterface $outputBuilder Chris@14: * Chris@14: * @throws InvalidArgumentException Chris@0: */ Chris@14: public function __construct($outputBuilder = null) Chris@0: { Chris@14: if ($outputBuilder instanceof DiffOutputBuilderInterface) { Chris@14: $this->outputBuilder = $outputBuilder; Chris@14: } elseif (null === $outputBuilder) { Chris@14: $this->outputBuilder = new UnifiedDiffOutputBuilder; Chris@14: } elseif (\is_string($outputBuilder)) { Chris@14: // PHPUnit 6.1.4, 6.2.0, 6.2.1, 6.2.2, and 6.2.3 support Chris@14: // @see https://github.com/sebastianbergmann/phpunit/issues/2734#issuecomment-314514056 Chris@14: // @deprecated Chris@14: $this->outputBuilder = new UnifiedDiffOutputBuilder($outputBuilder); Chris@14: } else { Chris@14: throw new InvalidArgumentException( Chris@14: \sprintf( Chris@14: 'Expected builder to be an instance of DiffOutputBuilderInterface, or a string, got %s.', Chris@14: \is_object($outputBuilder) ? 'instance of "' . \get_class($outputBuilder) . '"' : \gettype($outputBuilder) . ' "' . $outputBuilder . '"' Chris@14: ) Chris@14: ); Chris@14: } Chris@0: } Chris@0: Chris@0: /** Chris@0: * Returns the diff between two arrays or strings as string. Chris@0: * Chris@14: * @param array|string $from Chris@14: * @param array|string $to Chris@14: * @param LongestCommonSubsequenceCalculator|null $lcs Chris@0: * Chris@0: * @return string Chris@0: */ Chris@14: public function diff($from, $to, LongestCommonSubsequenceCalculator $lcs = null): string Chris@0: { Chris@14: $from = $this->validateDiffInput($from); Chris@14: $to = $this->validateDiffInput($to); Chris@14: $diff = $this->diffToArray($from, $to, $lcs); Chris@12: Chris@14: return $this->outputBuilder->getDiff($diff); Chris@12: } Chris@12: Chris@12: /** Chris@12: * Casts variable to string if it is not a string or array. Chris@12: * Chris@12: * @param mixed $input Chris@12: * Chris@12: * @return string Chris@12: */ Chris@14: private function validateDiffInput($input): string Chris@12: { Chris@12: if (!\is_array($input) && !\is_string($input)) { Chris@12: return (string) $input; Chris@0: } Chris@0: Chris@12: return $input; Chris@12: } Chris@0: Chris@12: /** Chris@0: * Returns the diff between two arrays or strings as array. Chris@0: * Chris@0: * Each array element contains two elements: Chris@12: * - [0] => mixed $token Chris@0: * - [1] => 2|1|0 Chris@0: * Chris@0: * - 2: REMOVED: $token was removed from $from Chris@0: * - 1: ADDED: $token was added to $from Chris@0: * - 0: OLD: $token is not changed in $to Chris@0: * Chris@14: * @param array|string $from Chris@14: * @param array|string $to Chris@14: * @param LongestCommonSubsequenceCalculator $lcs Chris@0: * Chris@0: * @return array Chris@0: */ Chris@14: public function diffToArray($from, $to, LongestCommonSubsequenceCalculator $lcs = null): array Chris@0: { Chris@12: if (\is_string($from)) { Chris@14: $from = $this->splitStringByLines($from); Chris@14: } elseif (!\is_array($from)) { Chris@12: throw new \InvalidArgumentException('"from" must be an array or string.'); Chris@0: } Chris@0: Chris@12: if (\is_string($to)) { Chris@14: $to = $this->splitStringByLines($to); Chris@14: } elseif (!\is_array($to)) { Chris@12: throw new \InvalidArgumentException('"to" must be an array or string.'); Chris@0: } Chris@0: Chris@12: list($from, $to, $start, $end) = self::getArrayDiffParted($from, $to); Chris@0: Chris@0: if ($lcs === null) { Chris@0: $lcs = $this->selectLcsImplementation($from, $to); Chris@0: } Chris@0: Chris@12: $common = $lcs->calculate(\array_values($from), \array_values($to)); Chris@14: $diff = []; Chris@0: Chris@0: foreach ($start as $token) { Chris@14: $diff[] = [$token, 0 /* OLD */]; Chris@0: } Chris@0: Chris@12: \reset($from); Chris@12: \reset($to); Chris@0: Chris@0: foreach ($common as $token) { Chris@12: while (($fromToken = \reset($from)) !== $token) { Chris@14: $diff[] = [\array_shift($from), 2 /* REMOVED */]; Chris@0: } Chris@0: Chris@12: while (($toToken = \reset($to)) !== $token) { Chris@14: $diff[] = [\array_shift($to), 1 /* ADDED */]; Chris@0: } Chris@0: Chris@14: $diff[] = [$token, 0 /* OLD */]; Chris@0: Chris@12: \array_shift($from); Chris@12: \array_shift($to); Chris@0: } Chris@0: Chris@12: while (($token = \array_shift($from)) !== null) { Chris@14: $diff[] = [$token, 2 /* REMOVED */]; Chris@0: } Chris@0: Chris@12: while (($token = \array_shift($to)) !== null) { Chris@14: $diff[] = [$token, 1 /* ADDED */]; Chris@0: } Chris@0: Chris@0: foreach ($end as $token) { Chris@14: $diff[] = [$token, 0 /* OLD */]; Chris@14: } Chris@14: Chris@14: if ($this->detectUnmatchedLineEndings($diff)) { Chris@14: \array_unshift($diff, ["#Warning: Strings contain different line endings!\n", 3]); Chris@0: } Chris@0: Chris@0: return $diff; Chris@0: } Chris@0: Chris@0: /** Chris@12: * Checks if input is string, if so it will split it line-by-line. Chris@12: * Chris@12: * @param string $input Chris@12: * Chris@12: * @return array Chris@12: */ Chris@14: private function splitStringByLines(string $input): array Chris@12: { Chris@14: return \preg_split('/(.*\R)/', $input, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); Chris@12: } Chris@12: Chris@12: /** Chris@0: * @param array $from Chris@0: * @param array $to Chris@0: * Chris@14: * @return LongestCommonSubsequenceCalculator Chris@0: */ Chris@14: private function selectLcsImplementation(array $from, array $to): LongestCommonSubsequenceCalculator Chris@0: { Chris@0: // We do not want to use the time-efficient implementation if its memory Chris@0: // footprint will probably exceed this value. Note that the footprint Chris@0: // calculation is only an estimation for the matrix and the LCS method Chris@0: // will typically allocate a bit more memory than this. Chris@0: $memoryLimit = 100 * 1024 * 1024; Chris@0: Chris@0: if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) { Chris@14: return new MemoryEfficientLongestCommonSubsequenceCalculator; Chris@0: } Chris@0: Chris@14: return new TimeEfficientLongestCommonSubsequenceCalculator; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Calculates the estimated memory footprint for the DP-based method. Chris@0: * Chris@0: * @param array $from Chris@0: * @param array $to Chris@0: * Chris@12: * @return int|float Chris@0: */ Chris@0: private function calculateEstimatedFootprint(array $from, array $to) Chris@0: { Chris@12: $itemSize = PHP_INT_SIZE === 4 ? 76 : 144; Chris@0: Chris@14: return $itemSize * \min(\count($from), \count($to)) ** 2; Chris@12: } Chris@12: Chris@12: /** Chris@14: * Returns true if line ends don't match in a diff. Chris@12: * Chris@14: * @param array $diff Chris@12: * Chris@12: * @return bool Chris@12: */ Chris@14: private function detectUnmatchedLineEndings(array $diff): bool Chris@12: { Chris@14: $newLineBreaks = ['' => true]; Chris@14: $oldLineBreaks = ['' => true]; Chris@14: Chris@14: foreach ($diff as $entry) { Chris@14: if (0 === $entry[1]) { /* OLD */ Chris@14: $ln = $this->getLinebreak($entry[0]); Chris@14: $oldLineBreaks[$ln] = true; Chris@14: $newLineBreaks[$ln] = true; Chris@14: } elseif (1 === $entry[1]) { /* ADDED */ Chris@14: $newLineBreaks[$this->getLinebreak($entry[0])] = true; Chris@14: } elseif (2 === $entry[1]) { /* REMOVED */ Chris@14: $oldLineBreaks[$this->getLinebreak($entry[0])] = true; Chris@14: } Chris@14: } Chris@14: Chris@14: // if either input or output is a single line without breaks than no warning should be raised Chris@14: if (['' => true] === $newLineBreaks || ['' => true] === $oldLineBreaks) { Chris@14: return false; Chris@14: } Chris@14: Chris@14: // two way compare Chris@14: foreach ($newLineBreaks as $break => $set) { Chris@14: if (!isset($oldLineBreaks[$break])) { Chris@14: return true; Chris@14: } Chris@14: } Chris@14: Chris@14: foreach ($oldLineBreaks as $break => $set) { Chris@14: if (!isset($newLineBreaks[$break])) { Chris@14: return true; Chris@14: } Chris@14: } Chris@14: Chris@14: return false; Chris@12: } Chris@12: Chris@14: private function getLinebreak($line): string Chris@12: { Chris@14: if (!\is_string($line)) { Chris@14: return ''; Chris@14: } Chris@14: Chris@14: $lc = \substr($line, -1); Chris@14: if ("\r" === $lc) { Chris@14: return "\r"; Chris@14: } Chris@14: Chris@14: if ("\n" !== $lc) { Chris@14: return ''; Chris@14: } Chris@14: Chris@14: if ("\r\n" === \substr($line, -2)) { Chris@14: return "\r\n"; Chris@14: } Chris@14: Chris@14: return "\n"; Chris@14: } Chris@14: Chris@14: private static function getArrayDiffParted(array &$from, array &$to): array Chris@14: { Chris@14: $start = []; Chris@14: $end = []; Chris@12: Chris@12: \reset($to); Chris@12: Chris@12: foreach ($from as $k => $v) { Chris@12: $toK = \key($to); Chris@12: Chris@12: if ($toK === $k && $v === $to[$k]) { Chris@12: $start[$k] = $v; Chris@12: Chris@12: unset($from[$k], $to[$k]); Chris@12: } else { Chris@12: break; Chris@12: } Chris@12: } Chris@12: Chris@12: \end($from); Chris@12: \end($to); Chris@12: Chris@12: do { Chris@12: $fromK = \key($from); Chris@12: $toK = \key($to); Chris@12: Chris@12: if (null === $fromK || null === $toK || \current($from) !== \current($to)) { Chris@12: break; Chris@12: } Chris@12: Chris@12: \prev($from); Chris@12: \prev($to); Chris@12: Chris@14: $end = [$fromK => $from[$fromK]] + $end; Chris@12: unset($from[$fromK], $to[$toK]); Chris@12: } while (true); Chris@12: Chris@14: return [$from, $to, $start, $end]; Chris@0: } Chris@0: }