Chris@0: Chris@0: * Chris@0: * For the full copyright and license information, please view the LICENSE Chris@0: * file that was distributed with this source code. Chris@0: */ Chris@0: Chris@0: namespace SebastianBergmann\Diff; Chris@0: Chris@0: use SebastianBergmann\Diff\LCS\LongestCommonSubsequence; Chris@0: use SebastianBergmann\Diff\LCS\TimeEfficientImplementation; Chris@0: use SebastianBergmann\Diff\LCS\MemoryEfficientImplementation; Chris@0: Chris@0: /** Chris@0: * Diff implementation. Chris@0: */ Chris@0: class Differ Chris@0: { Chris@0: /** Chris@0: * @var string Chris@0: */ Chris@0: private $header; Chris@0: Chris@0: /** Chris@0: * @var bool Chris@0: */ Chris@0: private $showNonDiffLines; Chris@0: Chris@0: /** Chris@0: * @param string $header Chris@0: */ Chris@0: public function __construct($header = "--- Original\n+++ New\n", $showNonDiffLines = true) Chris@0: { Chris@0: $this->header = $header; Chris@0: $this->showNonDiffLines = $showNonDiffLines; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Returns the diff between two arrays or strings as string. Chris@0: * Chris@0: * @param array|string $from Chris@0: * @param array|string $to Chris@0: * @param LongestCommonSubsequence $lcs Chris@0: * Chris@0: * @return string Chris@0: */ Chris@0: public function diff($from, $to, LongestCommonSubsequence $lcs = null) Chris@0: { Chris@0: if (!is_array($from) && !is_string($from)) { Chris@0: $from = (string) $from; Chris@0: } Chris@0: Chris@0: if (!is_array($to) && !is_string($to)) { Chris@0: $to = (string) $to; Chris@0: } Chris@0: Chris@0: $buffer = $this->header; Chris@0: $diff = $this->diffToArray($from, $to, $lcs); Chris@0: Chris@0: $inOld = false; Chris@0: $i = 0; Chris@0: $old = array(); Chris@0: Chris@0: foreach ($diff as $line) { Chris@0: if ($line[1] === 0 /* OLD */) { Chris@0: if ($inOld === false) { Chris@0: $inOld = $i; Chris@0: } Chris@0: } elseif ($inOld !== false) { Chris@0: if (($i - $inOld) > 5) { Chris@0: $old[$inOld] = $i - 1; Chris@0: } Chris@0: Chris@0: $inOld = false; Chris@0: } Chris@0: Chris@0: ++$i; Chris@0: } Chris@0: Chris@0: $start = isset($old[0]) ? $old[0] : 0; Chris@0: $end = count($diff); Chris@0: Chris@0: if ($tmp = array_search($end, $old)) { Chris@0: $end = $tmp; Chris@0: } Chris@0: Chris@0: $newChunk = true; Chris@0: Chris@0: for ($i = $start; $i < $end; $i++) { Chris@0: if (isset($old[$i])) { Chris@0: $buffer .= "\n"; Chris@0: $newChunk = true; Chris@0: $i = $old[$i]; Chris@0: } Chris@0: Chris@0: if ($newChunk) { Chris@0: if ($this->showNonDiffLines === true) { Chris@0: $buffer .= "@@ @@\n"; Chris@0: } Chris@0: $newChunk = false; Chris@0: } Chris@0: Chris@0: if ($diff[$i][1] === 1 /* ADDED */) { Chris@0: $buffer .= '+' . $diff[$i][0] . "\n"; Chris@0: } elseif ($diff[$i][1] === 2 /* REMOVED */) { Chris@0: $buffer .= '-' . $diff[$i][0] . "\n"; Chris@0: } elseif ($this->showNonDiffLines === true) { Chris@0: $buffer .= ' ' . $diff[$i][0] . "\n"; Chris@0: } Chris@0: } Chris@0: Chris@0: return $buffer; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Returns the diff between two arrays or strings as array. Chris@0: * Chris@0: * Each array element contains two elements: Chris@0: * - [0] => string $token Chris@0: * - [1] => 2|1|0 Chris@0: * Chris@0: * - 2: REMOVED: $token was removed from $from Chris@0: * - 1: ADDED: $token was added to $from Chris@0: * - 0: OLD: $token is not changed in $to Chris@0: * Chris@0: * @param array|string $from Chris@0: * @param array|string $to Chris@0: * @param LongestCommonSubsequence $lcs Chris@0: * Chris@0: * @return array Chris@0: */ Chris@0: public function diffToArray($from, $to, LongestCommonSubsequence $lcs = null) Chris@0: { Chris@0: preg_match_all('(\r\n|\r|\n)', $from, $fromMatches); Chris@0: preg_match_all('(\r\n|\r|\n)', $to, $toMatches); Chris@0: Chris@0: if (is_string($from)) { Chris@0: $from = preg_split('(\r\n|\r|\n)', $from); Chris@0: } Chris@0: Chris@0: if (is_string($to)) { Chris@0: $to = preg_split('(\r\n|\r|\n)', $to); Chris@0: } Chris@0: Chris@0: $start = array(); Chris@0: $end = array(); Chris@0: $fromLength = count($from); Chris@0: $toLength = count($to); Chris@0: $length = min($fromLength, $toLength); Chris@0: Chris@0: for ($i = 0; $i < $length; ++$i) { Chris@0: if ($from[$i] === $to[$i]) { Chris@0: $start[] = $from[$i]; Chris@0: unset($from[$i], $to[$i]); Chris@0: } else { Chris@0: break; Chris@0: } Chris@0: } Chris@0: Chris@0: $length -= $i; Chris@0: Chris@0: for ($i = 1; $i < $length; ++$i) { Chris@0: if ($from[$fromLength - $i] === $to[$toLength - $i]) { Chris@0: array_unshift($end, $from[$fromLength - $i]); Chris@0: unset($from[$fromLength - $i], $to[$toLength - $i]); Chris@0: } else { Chris@0: break; Chris@0: } Chris@0: } Chris@0: Chris@0: if ($lcs === null) { Chris@0: $lcs = $this->selectLcsImplementation($from, $to); Chris@0: } Chris@0: Chris@0: $common = $lcs->calculate(array_values($from), array_values($to)); Chris@0: $diff = array(); Chris@0: Chris@0: if (isset($fromMatches[0]) && $toMatches[0] && Chris@0: count($fromMatches[0]) === count($toMatches[0]) && Chris@0: $fromMatches[0] !== $toMatches[0]) { Chris@0: $diff[] = array( Chris@0: '#Warning: Strings contain different line endings!', 0 Chris@0: ); Chris@0: } Chris@0: Chris@0: foreach ($start as $token) { Chris@0: $diff[] = array($token, 0 /* OLD */); Chris@0: } Chris@0: Chris@0: reset($from); Chris@0: reset($to); Chris@0: Chris@0: foreach ($common as $token) { Chris@0: while ((($fromToken = reset($from)) !== $token)) { Chris@0: $diff[] = array(array_shift($from), 2 /* REMOVED */); Chris@0: } Chris@0: Chris@0: while ((($toToken = reset($to)) !== $token)) { Chris@0: $diff[] = array(array_shift($to), 1 /* ADDED */); Chris@0: } Chris@0: Chris@0: $diff[] = array($token, 0 /* OLD */); Chris@0: Chris@0: array_shift($from); Chris@0: array_shift($to); Chris@0: } Chris@0: Chris@0: while (($token = array_shift($from)) !== null) { Chris@0: $diff[] = array($token, 2 /* REMOVED */); Chris@0: } Chris@0: Chris@0: while (($token = array_shift($to)) !== null) { Chris@0: $diff[] = array($token, 1 /* ADDED */); Chris@0: } Chris@0: Chris@0: foreach ($end as $token) { Chris@0: $diff[] = array($token, 0 /* OLD */); Chris@0: } Chris@0: Chris@0: return $diff; Chris@0: } Chris@0: Chris@0: /** Chris@0: * @param array $from Chris@0: * @param array $to Chris@0: * Chris@0: * @return LongestCommonSubsequence Chris@0: */ Chris@0: private function selectLcsImplementation(array $from, array $to) Chris@0: { Chris@0: // We do not want to use the time-efficient implementation if its memory Chris@0: // footprint will probably exceed this value. Note that the footprint Chris@0: // calculation is only an estimation for the matrix and the LCS method Chris@0: // will typically allocate a bit more memory than this. Chris@0: $memoryLimit = 100 * 1024 * 1024; Chris@0: Chris@0: if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) { Chris@0: return new MemoryEfficientImplementation; Chris@0: } Chris@0: Chris@0: return new TimeEfficientImplementation; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Calculates the estimated memory footprint for the DP-based method. Chris@0: * Chris@0: * @param array $from Chris@0: * @param array $to Chris@0: * Chris@0: * @return int Chris@0: */ Chris@0: private function calculateEstimatedFootprint(array $from, array $to) Chris@0: { Chris@0: $itemSize = PHP_INT_SIZE == 4 ? 76 : 144; Chris@0: Chris@0: return $itemSize * pow(min(count($from), count($to)), 2); Chris@0: } Chris@0: }