Chris@0: _split($orig_lines); Chris@0: list($closing_words, $closing_stripped) = $this->_split($closing_lines); Chris@0: Chris@0: parent::__construct($orig_words, $closing_words, $orig_stripped, $closing_stripped); Chris@0: } Chris@0: Chris@0: protected function _split($lines) { Chris@0: $words = []; Chris@0: $stripped = []; Chris@0: $first = TRUE; Chris@0: foreach ($lines as $line) { Chris@0: // If the line is too long, just pretend the entire line is one big word Chris@0: // This prevents resource exhaustion problems Chris@0: if ( $first ) { Chris@0: $first = FALSE; Chris@0: } Chris@0: else { Chris@0: $words[] = "\n"; Chris@0: $stripped[] = "\n"; Chris@0: } Chris@17: if (mb_strlen($line) > $this::MAX_LINE_LENGTH) { Chris@0: $words[] = $line; Chris@0: $stripped[] = $line; Chris@0: } Chris@0: else { Chris@0: if (preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs', $line, $m)) { Chris@0: $words = array_merge($words, $m[0]); Chris@0: $stripped = array_merge($stripped, $m[1]); Chris@0: } Chris@0: } Chris@0: } Chris@0: return [$words, $stripped]; Chris@0: } Chris@0: Chris@0: public function orig() { Chris@0: $orig = new HWLDFWordAccumulator(); Chris@0: Chris@0: foreach ($this->edits as $edit) { Chris@0: if ($edit->type == 'copy') { Chris@0: $orig->addWords($edit->orig); Chris@0: } Chris@0: elseif ($edit->orig) { Chris@0: $orig->addWords($edit->orig, 'mark'); Chris@0: } Chris@0: } Chris@0: $lines = $orig->getLines(); Chris@0: return $lines; Chris@0: } Chris@0: Chris@0: public function closing() { Chris@0: $closing = new HWLDFWordAccumulator(); Chris@0: Chris@0: foreach ($this->edits as $edit) { Chris@0: if ($edit->type == 'copy') { Chris@0: $closing->addWords($edit->closing); Chris@0: } Chris@0: elseif ($edit->closing) { Chris@0: $closing->addWords($edit->closing, 'mark'); Chris@0: } Chris@0: } Chris@0: $lines = $closing->getLines(); Chris@0: return $lines; Chris@0: } Chris@0: Chris@0: }