Chris@0: Chris@0: * Chris@0: * For the full copyright and license information, please view the LICENSE Chris@0: * file that was distributed with this source code. Chris@0: */ Chris@0: Chris@0: namespace SebastianBergmann\Diff; Chris@0: Chris@0: use SebastianBergmann\Diff\LCS\LongestCommonSubsequence; Chris@0: use SebastianBergmann\Diff\LCS\TimeEfficientImplementation; Chris@0: use SebastianBergmann\Diff\LCS\MemoryEfficientImplementation; Chris@0: Chris@0: /** Chris@0: * Diff implementation. Chris@0: */ Chris@0: class Differ Chris@0: { Chris@0: /** Chris@0: * @var string Chris@0: */ Chris@0: private $header; Chris@0: Chris@0: /** Chris@0: * @var bool Chris@0: */ Chris@0: private $showNonDiffLines; Chris@0: Chris@0: /** Chris@0: * @param string $header Chris@12: * @param bool $showNonDiffLines Chris@0: */ Chris@0: public function __construct($header = "--- Original\n+++ New\n", $showNonDiffLines = true) Chris@0: { Chris@0: $this->header = $header; Chris@0: $this->showNonDiffLines = $showNonDiffLines; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Returns the diff between two arrays or strings as string. Chris@0: * Chris@0: * @param array|string $from Chris@0: * @param array|string $to Chris@0: * @param LongestCommonSubsequence $lcs Chris@0: * Chris@0: * @return string Chris@0: */ Chris@0: public function diff($from, $to, LongestCommonSubsequence $lcs = null) Chris@0: { Chris@12: $from = $this->validateDiffInput($from); Chris@12: $to = $this->validateDiffInput($to); Chris@12: $diff = $this->diffToArray($from, $to, $lcs); Chris@12: $old = $this->checkIfDiffInOld($diff); Chris@12: $start = isset($old[0]) ? $old[0] : 0; Chris@12: $end = \count($diff); Chris@12: Chris@12: if ($tmp = \array_search($end, $old)) { Chris@12: $end = $tmp; Chris@0: } Chris@0: Chris@12: return $this->getBuffer($diff, $old, $start, $end); Chris@12: } Chris@12: Chris@12: /** Chris@12: * Casts variable to string if it is not a string or array. Chris@12: * Chris@12: * @param mixed $input Chris@12: * Chris@12: * @return string Chris@12: */ Chris@12: private function validateDiffInput($input) Chris@12: { Chris@12: if (!\is_array($input) && !\is_string($input)) { Chris@12: return (string) $input; Chris@0: } Chris@0: Chris@12: return $input; Chris@12: } Chris@0: Chris@12: /** Chris@12: * Takes input of the diff array and returns the old array. Chris@12: * Iterates through diff line by line, Chris@12: * Chris@12: * @param array $diff Chris@12: * Chris@12: * @return array Chris@12: */ Chris@12: private function checkIfDiffInOld(array $diff) Chris@12: { Chris@0: $inOld = false; Chris@0: $i = 0; Chris@0: $old = array(); Chris@0: Chris@0: foreach ($diff as $line) { Chris@12: if ($line[1] === 0 /* OLD */) { Chris@0: if ($inOld === false) { Chris@0: $inOld = $i; Chris@0: } Chris@0: } elseif ($inOld !== false) { Chris@0: if (($i - $inOld) > 5) { Chris@0: $old[$inOld] = $i - 1; Chris@0: } Chris@0: Chris@0: $inOld = false; Chris@0: } Chris@0: Chris@0: ++$i; Chris@0: } Chris@0: Chris@12: return $old; Chris@12: } Chris@0: Chris@12: /** Chris@12: * Generates buffer in string format, returning the patch. Chris@12: * Chris@12: * @param array $diff Chris@12: * @param array $old Chris@12: * @param int $start Chris@12: * @param int $end Chris@12: * Chris@12: * @return string Chris@12: */ Chris@12: private function getBuffer(array $diff, array $old, $start, $end) Chris@12: { Chris@12: $buffer = $this->header; Chris@12: Chris@12: if (!isset($old[$start])) { Chris@12: $buffer = $this->getDiffBufferElementNew($diff, $buffer, $start); Chris@12: ++$start; Chris@0: } Chris@0: Chris@0: for ($i = $start; $i < $end; $i++) { Chris@0: if (isset($old[$i])) { Chris@12: $i = $old[$i]; Chris@12: $buffer = $this->getDiffBufferElementNew($diff, $buffer, $i); Chris@12: } else { Chris@12: $buffer = $this->getDiffBufferElement($diff, $buffer, $i); Chris@0: } Chris@0: } Chris@0: Chris@0: return $buffer; Chris@0: } Chris@0: Chris@0: /** Chris@12: * Gets individual buffer element. Chris@12: * Chris@12: * @param array $diff Chris@12: * @param string $buffer Chris@12: * @param int $diffIndex Chris@12: * Chris@12: * @return string Chris@12: */ Chris@12: private function getDiffBufferElement(array $diff, $buffer, $diffIndex) Chris@12: { Chris@12: if ($diff[$diffIndex][1] === 1 /* ADDED */) { Chris@12: $buffer .= '+' . $diff[$diffIndex][0] . "\n"; Chris@12: } elseif ($diff[$diffIndex][1] === 2 /* REMOVED */) { Chris@12: $buffer .= '-' . $diff[$diffIndex][0] . "\n"; Chris@12: } elseif ($this->showNonDiffLines === true) { Chris@12: $buffer .= ' ' . $diff[$diffIndex][0] . "\n"; Chris@12: } Chris@12: Chris@12: return $buffer; Chris@12: } Chris@12: Chris@12: /** Chris@12: * Gets individual buffer element with opening. Chris@12: * Chris@12: * @param array $diff Chris@12: * @param string $buffer Chris@12: * @param int $diffIndex Chris@12: * Chris@12: * @return string Chris@12: */ Chris@12: private function getDiffBufferElementNew(array $diff, $buffer, $diffIndex) Chris@12: { Chris@12: if ($this->showNonDiffLines === true) { Chris@12: $buffer .= "@@ @@\n"; Chris@12: } Chris@12: Chris@12: return $this->getDiffBufferElement($diff, $buffer, $diffIndex); Chris@12: } Chris@12: Chris@12: /** Chris@0: * Returns the diff between two arrays or strings as array. Chris@0: * Chris@0: * Each array element contains two elements: Chris@12: * - [0] => mixed $token Chris@0: * - [1] => 2|1|0 Chris@0: * Chris@0: * - 2: REMOVED: $token was removed from $from Chris@0: * - 1: ADDED: $token was added to $from Chris@0: * - 0: OLD: $token is not changed in $to Chris@0: * Chris@0: * @param array|string $from Chris@0: * @param array|string $to Chris@0: * @param LongestCommonSubsequence $lcs Chris@0: * Chris@0: * @return array Chris@0: */ Chris@0: public function diffToArray($from, $to, LongestCommonSubsequence $lcs = null) Chris@0: { Chris@12: if (\is_string($from)) { Chris@12: $fromMatches = $this->getNewLineMatches($from); Chris@12: $from = $this->splitStringByLines($from); Chris@12: } elseif (\is_array($from)) { Chris@12: $fromMatches = array(); Chris@12: } else { Chris@12: throw new \InvalidArgumentException('"from" must be an array or string.'); Chris@0: } Chris@0: Chris@12: if (\is_string($to)) { Chris@12: $toMatches = $this->getNewLineMatches($to); Chris@12: $to = $this->splitStringByLines($to); Chris@12: } elseif (\is_array($to)) { Chris@12: $toMatches = array(); Chris@12: } else { Chris@12: throw new \InvalidArgumentException('"to" must be an array or string.'); Chris@0: } Chris@0: Chris@12: list($from, $to, $start, $end) = self::getArrayDiffParted($from, $to); Chris@0: Chris@0: if ($lcs === null) { Chris@0: $lcs = $this->selectLcsImplementation($from, $to); Chris@0: } Chris@0: Chris@12: $common = $lcs->calculate(\array_values($from), \array_values($to)); Chris@0: $diff = array(); Chris@0: Chris@12: if ($this->detectUnmatchedLineEndings($fromMatches, $toMatches)) { Chris@0: $diff[] = array( Chris@12: '#Warning: Strings contain different line endings!', Chris@12: 0 Chris@0: ); Chris@0: } Chris@0: Chris@0: foreach ($start as $token) { Chris@0: $diff[] = array($token, 0 /* OLD */); Chris@0: } Chris@0: Chris@12: \reset($from); Chris@12: \reset($to); Chris@0: Chris@0: foreach ($common as $token) { Chris@12: while (($fromToken = \reset($from)) !== $token) { Chris@12: $diff[] = array(\array_shift($from), 2 /* REMOVED */); Chris@0: } Chris@0: Chris@12: while (($toToken = \reset($to)) !== $token) { Chris@12: $diff[] = array(\array_shift($to), 1 /* ADDED */); Chris@0: } Chris@0: Chris@0: $diff[] = array($token, 0 /* OLD */); Chris@0: Chris@12: \array_shift($from); Chris@12: \array_shift($to); Chris@0: } Chris@0: Chris@12: while (($token = \array_shift($from)) !== null) { Chris@0: $diff[] = array($token, 2 /* REMOVED */); Chris@0: } Chris@0: Chris@12: while (($token = \array_shift($to)) !== null) { Chris@0: $diff[] = array($token, 1 /* ADDED */); Chris@0: } Chris@0: Chris@0: foreach ($end as $token) { Chris@0: $diff[] = array($token, 0 /* OLD */); Chris@0: } Chris@0: Chris@0: return $diff; Chris@0: } Chris@0: Chris@0: /** Chris@12: * Get new strings denoting new lines from a given string. Chris@12: * Chris@12: * @param string $string Chris@12: * Chris@12: * @return array Chris@12: */ Chris@12: private function getNewLineMatches($string) Chris@12: { Chris@12: \preg_match_all('(\r\n|\r|\n)', $string, $stringMatches); Chris@12: Chris@12: return $stringMatches; Chris@12: } Chris@12: Chris@12: /** Chris@12: * Checks if input is string, if so it will split it line-by-line. Chris@12: * Chris@12: * @param string $input Chris@12: * Chris@12: * @return array Chris@12: */ Chris@12: private function splitStringByLines($input) Chris@12: { Chris@12: return \preg_split('(\r\n|\r|\n)', $input); Chris@12: } Chris@12: Chris@12: /** Chris@0: * @param array $from Chris@0: * @param array $to Chris@0: * Chris@0: * @return LongestCommonSubsequence Chris@0: */ Chris@0: private function selectLcsImplementation(array $from, array $to) Chris@0: { Chris@0: // We do not want to use the time-efficient implementation if its memory Chris@0: // footprint will probably exceed this value. Note that the footprint Chris@0: // calculation is only an estimation for the matrix and the LCS method Chris@0: // will typically allocate a bit more memory than this. Chris@0: $memoryLimit = 100 * 1024 * 1024; Chris@0: Chris@0: if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) { Chris@0: return new MemoryEfficientImplementation; Chris@0: } Chris@0: Chris@0: return new TimeEfficientImplementation; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Calculates the estimated memory footprint for the DP-based method. Chris@0: * Chris@0: * @param array $from Chris@0: * @param array $to Chris@0: * Chris@12: * @return int|float Chris@0: */ Chris@0: private function calculateEstimatedFootprint(array $from, array $to) Chris@0: { Chris@12: $itemSize = PHP_INT_SIZE === 4 ? 76 : 144; Chris@0: Chris@12: return $itemSize * \pow(\min(\count($from), \count($to)), 2); Chris@12: } Chris@12: Chris@12: /** Chris@12: * Returns true if line ends don't match on fromMatches and toMatches. Chris@12: * Chris@12: * @param array $fromMatches Chris@12: * @param array $toMatches Chris@12: * Chris@12: * @return bool Chris@12: */ Chris@12: private function detectUnmatchedLineEndings(array $fromMatches, array $toMatches) Chris@12: { Chris@12: return isset($fromMatches[0], $toMatches[0]) && Chris@12: \count($fromMatches[0]) === \count($toMatches[0]) && Chris@12: $fromMatches[0] !== $toMatches[0]; Chris@12: } Chris@12: Chris@12: /** Chris@12: * @param array $from Chris@12: * @param array $to Chris@12: * Chris@12: * @return array Chris@12: */ Chris@12: private static function getArrayDiffParted(array &$from, array &$to) Chris@12: { Chris@12: $start = array(); Chris@12: $end = array(); Chris@12: Chris@12: \reset($to); Chris@12: Chris@12: foreach ($from as $k => $v) { Chris@12: $toK = \key($to); Chris@12: Chris@12: if ($toK === $k && $v === $to[$k]) { Chris@12: $start[$k] = $v; Chris@12: Chris@12: unset($from[$k], $to[$k]); Chris@12: } else { Chris@12: break; Chris@12: } Chris@12: } Chris@12: Chris@12: \end($from); Chris@12: \end($to); Chris@12: Chris@12: do { Chris@12: $fromK = \key($from); Chris@12: $toK = \key($to); Chris@12: Chris@12: if (null === $fromK || null === $toK || \current($from) !== \current($to)) { Chris@12: break; Chris@12: } Chris@12: Chris@12: \prev($from); Chris@12: \prev($to); Chris@12: Chris@12: $end = array($fromK => $from[$fromK]) + $end; Chris@12: unset($from[$fromK], $to[$toK]); Chris@12: } while (true); Chris@12: Chris@12: return array($from, $to, $start, $end); Chris@0: } Chris@0: }