Chris@0: #!/usr/bin/env php Chris@0: ': Chris@0: $inTag = false; Chris@0: Chris@0: default: Chris@0: } Chris@0: Chris@0: if ($inTag) { Chris@0: $tagWidth++; Chris@0: } Chris@0: Chris@0: $i++; Chris@0: Chris@0: if (!$inTag && ($i - $tagWidth > $width)) { Chris@0: $lastSpace = $lastSpace ?: $width; Chris@0: Chris@0: $return[] = trim(substr($text, 0, $lastSpace)); Chris@0: $text = substr($text, $lastSpace); Chris@0: $len = strlen($text); Chris@0: Chris@0: $i = $tagWidth = 0; Chris@0: } Chris@0: } while ($i < $len); Chris@0: Chris@0: $return[] = trim($text); Chris@0: Chris@0: return implode("\n", $return); Chris@0: } Chris@0: Chris@0: function extract_paragraphs($element) Chris@0: { Chris@0: $paragraphs = array(); Chris@0: foreach ($element->getElementsByTagName('para') as $p) { Chris@0: $text = ''; Chris@0: foreach ($p->childNodes as $child) { Chris@0: // @todo figure out if there's something we can do with tables. Chris@0: if ($child instanceof DOMElement && $child->tagName === 'table') { Chris@0: continue; Chris@0: } Chris@0: Chris@0: // skip references, because ugh. Chris@0: if (preg_match('{^\s*&[a-z][a-z\.]+;\s*$}', $child->textContent)) { Chris@0: continue; Chris@0: } Chris@0: Chris@0: $text .= $child->ownerDocument->saveXML($child); Chris@0: } Chris@0: Chris@0: if ($text = trim(preg_replace('{\n[ \t]+}', ' ', $text))) { Chris@0: $paragraphs[] = $text; Chris@0: } Chris@0: } Chris@0: Chris@0: return implode("\n\n", $paragraphs); Chris@0: } Chris@0: Chris@0: function format_doc($doc) Chris@0: { Chris@0: $chunks = array(); Chris@0: Chris@0: if (!empty($doc['description'])) { Chris@0: $chunks[] = 'Description:'; Chris@0: $chunks[] = indent_text(htmlwrap(thunk_tags($doc['description']), WRAP_WIDTH - 2)); Chris@0: $chunks[] = ''; Chris@0: } Chris@0: Chris@0: if (!empty($doc['params'])) { Chris@0: $chunks[] = 'Param:'; Chris@0: Chris@0: $typeMax = max(array_map(function ($param) { Chris@0: return strlen($param['type']); Chris@0: }, $doc['params'])); Chris@0: Chris@0: $max = max(array_map(function ($param) { Chris@0: return strlen($param['name']); Chris@0: }, $doc['params'])); Chris@0: Chris@0: $template = ' %-' . $typeMax . 's %-' . $max . 's %s'; Chris@0: $indent = str_repeat(' ', $typeMax + $max + 6); Chris@0: $wrapWidth = WRAP_WIDTH - strlen($indent); Chris@0: Chris@0: foreach ($doc['params'] as $param) { Chris@0: $desc = indent_text(htmlwrap(thunk_tags($param['description']), $wrapWidth), $indent, false); Chris@0: $chunks[] = sprintf($template, $param['type'], $param['name'], $desc); Chris@0: } Chris@0: $chunks[] = ''; Chris@0: } Chris@0: Chris@0: if (isset($doc['return']) || isset($doc['return_type'])) { Chris@0: $chunks[] = 'Return:'; Chris@0: Chris@0: $type = isset($doc['return_type']) ? $doc['return_type'] : 'unknown'; Chris@0: $desc = isset($doc['return']) ? $doc['return'] : ''; Chris@0: Chris@0: $indent = str_repeat(' ', strlen($type) + 4); Chris@0: $wrapWidth = WRAP_WIDTH - strlen($indent); Chris@0: Chris@0: if (!empty($desc)) { Chris@0: $desc = indent_text(htmlwrap(thunk_tags($doc['return']), $wrapWidth), $indent, false); Chris@0: } Chris@0: Chris@0: $chunks[] = sprintf(' %s %s', $type, $desc); Chris@0: $chunks[] = ''; Chris@0: } Chris@0: Chris@0: array_pop($chunks); // get rid of the trailing newline Chris@0: Chris@0: return implode("\n", $chunks); Chris@0: } Chris@0: Chris@0: function thunk_tags($text) Chris@0: { Chris@0: $tagMap = array( Chris@0: 'parameter>' => 'strong>', Chris@0: 'function>' => 'strong>', Chris@0: 'literal>' => 'return>', Chris@0: 'type>' => 'info>', Chris@0: 'constant>' => 'info>', Chris@0: ); Chris@0: Chris@0: $andBack = array( Chris@0: '&' => '&', Chris@0: '&true;' => 'true', Chris@0: '&false;' => 'false', Chris@0: '&null;' => 'null', Chris@0: ); Chris@0: Chris@0: return strtr(strip_tags(strtr($text, $tagMap), ''), $andBack); Chris@0: } Chris@0: Chris@0: function indent_text($text, $indent = ' ', $leading = true) Chris@0: { Chris@0: return ($leading ? $indent : '') . str_replace("\n", "\n" . $indent, $text); Chris@0: } Chris@0: Chris@0: function find_type($xml, $paramName) Chris@0: { Chris@0: foreach ($xml->getElementsByTagName('methodparam') as $param) { Chris@0: if ($type = $param->getElementsByTagName('type')->item(0)) { Chris@0: if ($parameter = $param->getElementsByTagName('parameter')->item(0)) { Chris@0: if ($paramName === $parameter->textContent) { Chris@0: return $type->textContent; Chris@0: } Chris@0: } Chris@0: } Chris@0: } Chris@0: } Chris@0: Chris@0: function format_function_doc($xml) Chris@0: { Chris@0: $doc = array(); Chris@0: $refsect1s = $xml->getElementsByTagName('refsect1'); Chris@0: foreach ($refsect1s as $refsect1) { Chris@0: $role = $refsect1->getAttribute('role'); Chris@0: switch ($role) { Chris@0: case 'description': Chris@0: $doc['description'] = extract_paragraphs($refsect1); Chris@0: Chris@0: if ($synopsis = $refsect1->getElementsByTagName('methodsynopsis')->item(0)) { Chris@0: foreach ($synopsis->childNodes as $node) { Chris@0: if ($node instanceof DOMElement && $node->tagName === 'type') { Chris@0: $doc['return_type'] = $node->textContent; Chris@0: break; Chris@0: } Chris@0: } Chris@0: } Chris@0: break; Chris@0: Chris@0: case 'returnvalues': Chris@0: // do nothing. Chris@0: $doc['return'] = extract_paragraphs($refsect1); Chris@0: break; Chris@0: Chris@0: case 'parameters': Chris@0: $params = array(); Chris@0: $vars = $refsect1->getElementsByTagName('varlistentry'); Chris@0: foreach ($vars as $var) { Chris@0: if ($name = $var->getElementsByTagName('parameter')->item(0)) { Chris@0: $params[] = array( Chris@0: 'name' => '$' . $name->textContent, Chris@0: 'type' => find_type($xml, $name->textContent), Chris@0: 'description' => extract_paragraphs($var), Chris@0: ); Chris@0: } Chris@0: } Chris@0: Chris@0: $doc['params'] = $params; Chris@0: break; Chris@0: } Chris@0: } Chris@0: Chris@0: // and the purpose Chris@0: if ($purpose = $xml->getElementsByTagName('refpurpose')->item(0)) { Chris@0: $desc = htmlwrap($purpose->textContent); Chris@0: if (isset($doc['description'])) { Chris@0: $desc .= "\n\n" . $doc['description']; Chris@0: } Chris@0: Chris@0: $doc['description'] = trim($desc); Chris@0: } Chris@0: Chris@0: $ids = array(); Chris@0: foreach ($xml->getElementsByTagName('refname') as $ref) { Chris@0: $ids[] = $ref->textContent; Chris@0: } Chris@0: Chris@0: return array($ids, format_doc($doc)); Chris@0: } Chris@0: Chris@0: function format_class_doc($xml) Chris@0: { Chris@0: // @todo implement this Chris@0: return array(array(), null); Chris@0: } Chris@0: Chris@0: $dir = new RecursiveDirectoryIterator($argv[1]); Chris@0: $filter = new RecursiveCallbackFilterIterator($dir, function ($current, $key, $iterator) { Chris@0: return $current->getFilename()[0] !== '.' && Chris@0: ($current->isDir() || $current->getExtension() === 'xml') && Chris@0: strpos($current->getFilename(), 'entities.') !== 0 && Chris@0: $current->getFilename() !== 'pdo_4d'; // Temporarily blacklist this one, the docs are weird. Chris@0: }); Chris@0: $iterator = new RecursiveIteratorIterator($filter); Chris@0: Chris@0: $docs = array(); Chris@0: foreach ($iterator as $file) { Chris@0: $xmlstr = str_replace('&', '&', file_get_contents($file)); Chris@0: Chris@0: $xml = new DOMDocument(); Chris@0: $xml->preserveWhiteSpace = false; Chris@0: Chris@0: if (!@$xml->loadXml($xmlstr)) { Chris@0: echo "XML Parse Error: $file\n"; Chris@0: continue; Chris@0: } Chris@0: Chris@0: if ($xml->getElementsByTagName('refentry')->length !== 0) { Chris@0: list($ids, $doc) = format_function_doc($xml); Chris@0: } elseif ($xml->getElementsByTagName('classref')->length !== 0) { Chris@0: list($ids, $doc) = format_class_doc($xml); Chris@0: } else { Chris@0: $ids = array(); Chris@0: $doc = null; Chris@0: } Chris@0: Chris@0: foreach ($ids as $id) { Chris@0: $docs[$id] = $doc; Chris@0: } Chris@0: } Chris@0: Chris@0: if (is_file($argv[2])) { Chris@0: unlink($argv[2]); Chris@0: } Chris@0: Chris@0: $db = new PDO('sqlite:' . $argv[2]); Chris@0: Chris@0: $db->query('CREATE TABLE php_manual (id char(256) PRIMARY KEY, doc TEXT)'); Chris@0: $cmd = $db->prepare('INSERT INTO php_manual (id, doc) VALUES (?, ?)'); Chris@0: foreach ($docs as $id => $doc) { Chris@0: $cmd->execute(array($id, $doc)); Chris@0: }