Chris@0
|
1 <?php
|
Chris@17
|
2
|
Chris@0
|
3 namespace Masterminds\HTML5\Parser;
|
Chris@0
|
4
|
Chris@0
|
5 /**
|
Chris@0
|
6 * Standard events for HTML5.
|
Chris@0
|
7 *
|
Chris@0
|
8 * This is roughly analogous to a SAX2 or expat-style interface.
|
Chris@0
|
9 * However, it is tuned specifically for HTML5, according to section 8
|
Chris@0
|
10 * of the HTML5 specification.
|
Chris@0
|
11 *
|
Chris@0
|
12 * An event handler receives parser events. For a concrete
|
Chris@0
|
13 * implementation, see DOMTreeBuilder.
|
Chris@0
|
14 *
|
Chris@0
|
15 * Quirks support in the parser is limited to close-in syntax (malformed
|
Chris@0
|
16 * tags or attributes). Higher order syntax and semantic issues with a
|
Chris@0
|
17 * document (e.g. mismatched tags, illegal nesting, etc.) are the
|
Chris@0
|
18 * responsibility of the event handler implementation.
|
Chris@0
|
19 *
|
Chris@0
|
20 * See HTML5 spec section 8.2.4
|
Chris@0
|
21 */
|
Chris@0
|
22 interface EventHandler
|
Chris@0
|
23 {
|
Chris@0
|
24 const DOCTYPE_NONE = 0;
|
Chris@0
|
25
|
Chris@0
|
26 const DOCTYPE_PUBLIC = 1;
|
Chris@0
|
27
|
Chris@0
|
28 const DOCTYPE_SYSTEM = 2;
|
Chris@0
|
29
|
Chris@0
|
30 /**
|
Chris@0
|
31 * A doctype declaration.
|
Chris@0
|
32 *
|
Chris@17
|
33 * @param string $name The name of the root element.
|
Chris@17
|
34 * @param int $idType One of DOCTYPE_NONE, DOCTYPE_PUBLIC, or DOCTYPE_SYSTEM
|
Chris@17
|
35 * @param string $id The identifier. For DOCTYPE_PUBLIC, this is the public ID. If DOCTYPE_SYSTEM,
|
Chris@17
|
36 * then this is a system ID.
|
Chris@17
|
37 * @param bool $quirks Indicates whether the builder should enter quirks mode.
|
Chris@0
|
38 */
|
Chris@0
|
39 public function doctype($name, $idType = 0, $id = null, $quirks = false);
|
Chris@0
|
40
|
Chris@0
|
41 /**
|
Chris@0
|
42 * A start tag.
|
Chris@0
|
43 *
|
Chris@0
|
44 * IMPORTANT: The parser watches the return value of this event. If this returns
|
Chris@0
|
45 * an integer, the parser will switch TEXTMODE patters according to the int.
|
Chris@0
|
46 *
|
Chris@0
|
47 * This is how the Tree Builder can tell the Tokenizer when a certain tag should
|
Chris@0
|
48 * cause the parser to go into RAW text mode.
|
Chris@0
|
49 *
|
Chris@0
|
50 * The HTML5 standard requires that the builder is the one that initiates this
|
Chris@0
|
51 * step, and this is the only way short of a circular reference that we can
|
Chris@0
|
52 * do that.
|
Chris@0
|
53 *
|
Chris@0
|
54 * Example: if a startTag even for a `script` name is fired, and the startTag()
|
Chris@0
|
55 * implementation returns Tokenizer::TEXTMODE_RAW, then the tokenizer will
|
Chris@0
|
56 * switch into RAW text mode and consume data until it reaches a closing
|
Chris@0
|
57 * `script` tag.
|
Chris@0
|
58 *
|
Chris@0
|
59 * The textmode is automatically reset to Tokenizer::TEXTMODE_NORMAL when the
|
Chris@0
|
60 * closing tag is encounter. **This behavior may change.**
|
Chris@0
|
61 *
|
Chris@17
|
62 * @param string $name The tag name.
|
Chris@17
|
63 * @param array $attributes An array with all of the tag's attributes.
|
Chris@17
|
64 * @param bool $selfClosing An indicator of whether or not this tag is self-closing (<foo/>).
|
Chris@17
|
65 *
|
Chris@17
|
66 * @return int one of the Tokenizer::TEXTMODE_* constants
|
Chris@0
|
67 */
|
Chris@0
|
68 public function startTag($name, $attributes = array(), $selfClosing = false);
|
Chris@0
|
69
|
Chris@0
|
70 /**
|
Chris@0
|
71 * An end-tag.
|
Chris@0
|
72 */
|
Chris@0
|
73 public function endTag($name);
|
Chris@0
|
74
|
Chris@0
|
75 /**
|
Chris@0
|
76 * A comment section (unparsed character data).
|
Chris@0
|
77 */
|
Chris@0
|
78 public function comment($cdata);
|
Chris@0
|
79
|
Chris@0
|
80 /**
|
Chris@0
|
81 * A unit of parsed character data.
|
Chris@0
|
82 *
|
Chris@0
|
83 * Entities in this text are *already decoded*.
|
Chris@0
|
84 */
|
Chris@0
|
85 public function text($cdata);
|
Chris@0
|
86
|
Chris@0
|
87 /**
|
Chris@0
|
88 * Indicates that the document has been entirely processed.
|
Chris@0
|
89 */
|
Chris@0
|
90 public function eof();
|
Chris@0
|
91
|
Chris@0
|
92 /**
|
Chris@0
|
93 * Emitted when the parser encounters an error condition.
|
Chris@0
|
94 */
|
Chris@0
|
95 public function parseError($msg, $line, $col);
|
Chris@0
|
96
|
Chris@0
|
97 /**
|
Chris@0
|
98 * A CDATA section.
|
Chris@0
|
99 *
|
Chris@0
|
100 * @param string $data
|
Chris@17
|
101 * The unparsed character data
|
Chris@0
|
102 */
|
Chris@0
|
103 public function cdata($data);
|
Chris@0
|
104
|
Chris@0
|
105 /**
|
Chris@0
|
106 * This is a holdover from the XML spec.
|
Chris@0
|
107 *
|
Chris@0
|
108 * While user agents don't get PIs, server-side does.
|
Chris@0
|
109 *
|
Chris@17
|
110 * @param string $name The name of the processor (e.g. 'php').
|
Chris@17
|
111 * @param string $data The unparsed data.
|
Chris@0
|
112 */
|
Chris@0
|
113 public function processingInstruction($name, $data = null);
|
Chris@0
|
114 }
|