Chris@5
|
1 <?php
|
Chris@5
|
2
|
Chris@5
|
3 namespace Drupal\migrate_plus\Plugin\migrate\process;
|
Chris@5
|
4
|
Chris@5
|
5 use Drupal\Component\Utility\Html;
|
Chris@5
|
6 use Drupal\migrate\MigrateException;
|
Chris@5
|
7 use Drupal\migrate\MigrateExecutableInterface;
|
Chris@5
|
8 use Drupal\migrate\Plugin\MigrationInterface;
|
Chris@5
|
9 use Drupal\migrate\ProcessPluginBase;
|
Chris@5
|
10 use Drupal\migrate\Row;
|
Chris@5
|
11
|
Chris@5
|
12 /**
|
Chris@5
|
13 * Handles string to DOM and back conversions.
|
Chris@5
|
14 *
|
Chris@5
|
15 * Available configuration keys:
|
Chris@5
|
16 * - method: Action to perform. Possible values:
|
Chris@5
|
17 * - import: string to DomDocument.
|
Chris@5
|
18 * - export: DomDocument to string.
|
Chris@5
|
19 * - non_root: (optional) Assume the passed HTML is not a complete hierarchy,
|
Chris@5
|
20 * but only a subset inside body element. Defaults to true.
|
Chris@5
|
21 *
|
Chris@5
|
22 * The following keys are only used if the method is 'import':
|
Chris@5
|
23 * - log_messages: (optional) When parsing HTML, libxml may trigger
|
Chris@5
|
24 * warnings. If this option is set to true, it will log them as migration
|
Chris@5
|
25 * messages. Otherwise, it will not handle it in a special way. Defaults to
|
Chris@5
|
26 * true.
|
Chris@5
|
27 * - version: (optional) The version number of the document as part of the XML
|
Chris@5
|
28 * declaration. Defaults to '1.0'.
|
Chris@5
|
29 * - encoding: (optional) The encoding of the document as part of the XML
|
Chris@5
|
30 * declaration. Defaults to 'UTF-8'.
|
Chris@5
|
31 *
|
Chris@5
|
32 * @codingStandardsIgnoreStart
|
Chris@5
|
33 *
|
Chris@5
|
34 * Examples:
|
Chris@5
|
35 * @code
|
Chris@5
|
36 * process:
|
Chris@5
|
37 * 'body/value':
|
Chris@5
|
38 * -
|
Chris@5
|
39 * plugin: dom
|
Chris@5
|
40 * method: import
|
Chris@5
|
41 * source: 'body/0/value'
|
Chris@5
|
42 * -
|
Chris@5
|
43 * plugin: dom
|
Chris@5
|
44 * method: export
|
Chris@5
|
45 * @endcode
|
Chris@5
|
46 * This example above will convert the input string to a DOMDocument object and
|
Chris@5
|
47 * back, with no explicit processing. It should have few noticeable effects.
|
Chris@5
|
48 *
|
Chris@5
|
49 * @code
|
Chris@5
|
50 * process:
|
Chris@5
|
51 * 'body/value':
|
Chris@5
|
52 * -
|
Chris@5
|
53 * plugin: dom
|
Chris@5
|
54 * method: import
|
Chris@5
|
55 * source: 'body/0/value'
|
Chris@5
|
56 * non_root: true
|
Chris@5
|
57 * log_messages: true
|
Chris@5
|
58 * version: '1.0'
|
Chris@5
|
59 * encoding: UTF-8
|
Chris@5
|
60 * -
|
Chris@5
|
61 * plugin: dom
|
Chris@5
|
62 * method: export
|
Chris@5
|
63 * non_root: true
|
Chris@5
|
64 * @endcode
|
Chris@5
|
65 * This example above will have the same effect as the previous example, since
|
Chris@5
|
66 * it specifies the default values for all the optional parameters.
|
Chris@5
|
67 *
|
Chris@5
|
68 * @codingStandardsIgnoreEnd
|
Chris@5
|
69 *
|
Chris@5
|
70 * @MigrateProcessPlugin(
|
Chris@5
|
71 * id = "dom"
|
Chris@5
|
72 * )
|
Chris@5
|
73 */
|
Chris@5
|
74 class Dom extends ProcessPluginBase {
|
Chris@5
|
75
|
Chris@5
|
76 /**
|
Chris@5
|
77 * If parsing warnings should be logged as migrate messages.
|
Chris@5
|
78 *
|
Chris@5
|
79 * @var bool
|
Chris@5
|
80 */
|
Chris@5
|
81 protected $logMessages = TRUE;
|
Chris@5
|
82
|
Chris@5
|
83 /**
|
Chris@5
|
84 * The HTML contains only the piece inside the body element.
|
Chris@5
|
85 *
|
Chris@5
|
86 * @var bool
|
Chris@5
|
87 */
|
Chris@5
|
88 protected $nonRoot = TRUE;
|
Chris@5
|
89
|
Chris@5
|
90 /**
|
Chris@5
|
91 * {@inheritdoc}
|
Chris@5
|
92 */
|
Chris@5
|
93 public function __construct(array $configuration, $plugin_id, $plugin_definition) {
|
Chris@5
|
94 if (!isset($configuration['method'])) {
|
Chris@5
|
95 throw new \InvalidArgumentException('The "method" must be set.');
|
Chris@5
|
96 }
|
Chris@5
|
97 if (!in_array($configuration['method'], ['import', 'export'])) {
|
Chris@5
|
98 throw new \InvalidArgumentException('The "method" must be "import" or "export".');
|
Chris@5
|
99 }
|
Chris@5
|
100 parent::__construct($configuration, $plugin_id, $plugin_definition);
|
Chris@5
|
101 $this->configuration += $this->defaultValues();
|
Chris@5
|
102 $this->logMessages = (bool) $this->configuration['log_messages'];
|
Chris@5
|
103 $this->nonRoot = (bool) $this->configuration['non_root'];
|
Chris@5
|
104 }
|
Chris@5
|
105
|
Chris@5
|
106 /**
|
Chris@5
|
107 * Supply default values of all optional parameters.
|
Chris@5
|
108 *
|
Chris@5
|
109 * @return array
|
Chris@5
|
110 * An array with keys the optional parameters and values the corresponding
|
Chris@5
|
111 * defaults.
|
Chris@5
|
112 */
|
Chris@5
|
113 protected function defaultValues() {
|
Chris@5
|
114 return [
|
Chris@5
|
115 'non_root' => TRUE,
|
Chris@5
|
116 'log_messages' => TRUE,
|
Chris@5
|
117 'version' => '1.0',
|
Chris@5
|
118 'encoding' => 'UTF-8',
|
Chris@5
|
119 ];
|
Chris@5
|
120 }
|
Chris@5
|
121
|
Chris@5
|
122 /**
|
Chris@5
|
123 * Converts a HTML string into a DOMDocument.
|
Chris@5
|
124 *
|
Chris@5
|
125 * It is not using \Drupal\Component\Utility\Html::load() because it ignores
|
Chris@5
|
126 * all errors on import, and therefore incompatible with log_messages
|
Chris@5
|
127 * option.
|
Chris@5
|
128 *
|
Chris@5
|
129 * @param mixed $value
|
Chris@5
|
130 * The string to be imported.
|
Chris@5
|
131 * @param \Drupal\migrate\MigrateExecutableInterface $migrate_executable
|
Chris@5
|
132 * The migration in which this process is being executed.
|
Chris@5
|
133 * @param \Drupal\migrate\Row $row
|
Chris@5
|
134 * The row from the source to process. Normally, just transforming the value
|
Chris@5
|
135 * is adequate but very rarely you might need to change two columns at the
|
Chris@5
|
136 * same time or something like that.
|
Chris@5
|
137 * @param string $destination_property
|
Chris@5
|
138 * The destination property currently worked on. This is only used together
|
Chris@5
|
139 * with the $row above.
|
Chris@5
|
140 *
|
Chris@5
|
141 * @return \DOMDocument
|
Chris@5
|
142 * The document object based on the provided string.
|
Chris@5
|
143 *
|
Chris@5
|
144 * @throws \Drupal\migrate\MigrateException
|
Chris@5
|
145 * When the received $value is not a string.
|
Chris@5
|
146 */
|
Chris@5
|
147 public function import($value, MigrateExecutableInterface $migrate_executable, Row $row, $destination_property) {
|
Chris@5
|
148 if (!is_string($value)) {
|
Chris@5
|
149 throw new MigrateException('Cannot import a non-string value.');
|
Chris@5
|
150 }
|
Chris@5
|
151
|
Chris@5
|
152 if ($this->logMessages) {
|
Chris@5
|
153 set_error_handler(function ($errno, $errstr) use ($migrate_executable) {
|
Chris@5
|
154 $migrate_executable->saveMessage($errstr, MigrationInterface::MESSAGE_WARNING);
|
Chris@5
|
155 });
|
Chris@5
|
156 }
|
Chris@5
|
157
|
Chris@5
|
158 if ($this->nonRoot) {
|
Chris@5
|
159 $html = $this->getNonRootHtml($value);
|
Chris@5
|
160 }
|
Chris@5
|
161 else {
|
Chris@5
|
162 $html = $value;
|
Chris@5
|
163 }
|
Chris@5
|
164
|
Chris@5
|
165 $document = new \DOMDocument($this->configuration['version'], $this->configuration['encoding']);
|
Chris@5
|
166 $document->loadHTML($html);
|
Chris@5
|
167
|
Chris@5
|
168 if ($this->logMessages) {
|
Chris@5
|
169 restore_error_handler();
|
Chris@5
|
170 }
|
Chris@5
|
171
|
Chris@5
|
172 return $document;
|
Chris@5
|
173 }
|
Chris@5
|
174
|
Chris@5
|
175 /**
|
Chris@5
|
176 * Converts a DOMDocument into a HTML string.
|
Chris@5
|
177 *
|
Chris@5
|
178 * @param mixed $value
|
Chris@5
|
179 * The document to be exported.
|
Chris@5
|
180 * @param \Drupal\migrate\MigrateExecutableInterface $migrate_executable
|
Chris@5
|
181 * The migration in which this process is being executed.
|
Chris@5
|
182 * @param \Drupal\migrate\Row $row
|
Chris@5
|
183 * The row from the source to process. Normally, just transforming the value
|
Chris@5
|
184 * is adequate but very rarely you might need to change two columns at the
|
Chris@5
|
185 * same time or something like that.
|
Chris@5
|
186 * @param string $destination_property
|
Chris@5
|
187 * The destination property currently worked on. This is only used together
|
Chris@5
|
188 * with the $row above.
|
Chris@5
|
189 *
|
Chris@5
|
190 * @return string
|
Chris@5
|
191 * The HTML string corresponding to the provided document object.
|
Chris@5
|
192 *
|
Chris@5
|
193 * @throws \Drupal\migrate\MigrateException
|
Chris@5
|
194 * When the received $value is not a \DOMDocument.
|
Chris@5
|
195 */
|
Chris@5
|
196 public function export($value, MigrateExecutableInterface $migrate_executable, Row $row, $destination_property) {
|
Chris@5
|
197 if (!$value instanceof \DOMDocument) {
|
Chris@5
|
198 $value_description = (gettype($value) == 'object') ? get_class($value) : gettype($value);
|
Chris@5
|
199 throw new MigrateException(sprintf('Cannot export a "%s".', $value_description));
|
Chris@5
|
200 }
|
Chris@5
|
201
|
Chris@5
|
202 if ($this->nonRoot) {
|
Chris@5
|
203 return Html::serialize($value);
|
Chris@5
|
204 }
|
Chris@5
|
205 return $value->saveHTML();
|
Chris@5
|
206 }
|
Chris@5
|
207
|
Chris@5
|
208 /**
|
Chris@5
|
209 * Builds an full html string based on a partial.
|
Chris@5
|
210 *
|
Chris@5
|
211 * @param string $partial
|
Chris@5
|
212 * A subset of a full html string. For instance the contents of the body
|
Chris@5
|
213 * element.
|
Chris@5
|
214 */
|
Chris@5
|
215 protected function getNonRootHtml($partial) {
|
Chris@5
|
216 $replacements = [
|
Chris@5
|
217 "\n" => '',
|
Chris@5
|
218 '!encoding' => strtolower($this->configuration['encoding']),
|
Chris@5
|
219 '!value' => $partial,
|
Chris@5
|
220 ];
|
Chris@5
|
221 // Prepend the html with a header using the configured source encoding.
|
Chris@5
|
222 // By default, loadHTML() assumes ISO-8859-1.
|
Chris@5
|
223 $html_template = <<<EOD
|
Chris@5
|
224 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
Chris@5
|
225 <html xmlns="http://www.w3.org/1999/xhtml">
|
Chris@5
|
226 <head><meta http-equiv="Content-Type" content="text/html; charset=!encoding" /></head>
|
Chris@5
|
227 <body>!value</body>
|
Chris@5
|
228 </html>
|
Chris@5
|
229 EOD;
|
Chris@5
|
230 return strtr($html_template, $replacements);
|
Chris@5
|
231 }
|
Chris@5
|
232
|
Chris@5
|
233 }
|