annotate modules/contrib/migrate_plus/src/Plugin/migrate/process/Dom.php @ 5:12f9dff5fda9 tip

Update to Drupal core 8.7.1
author Chris Cannam
date Thu, 09 May 2019 15:34:47 +0100
parents
children
rev   line source
Chris@5 1 <?php
Chris@5 2
Chris@5 3 namespace Drupal\migrate_plus\Plugin\migrate\process;
Chris@5 4
Chris@5 5 use Drupal\Component\Utility\Html;
Chris@5 6 use Drupal\migrate\MigrateException;
Chris@5 7 use Drupal\migrate\MigrateExecutableInterface;
Chris@5 8 use Drupal\migrate\Plugin\MigrationInterface;
Chris@5 9 use Drupal\migrate\ProcessPluginBase;
Chris@5 10 use Drupal\migrate\Row;
Chris@5 11
Chris@5 12 /**
Chris@5 13 * Handles string to DOM and back conversions.
Chris@5 14 *
Chris@5 15 * Available configuration keys:
Chris@5 16 * - method: Action to perform. Possible values:
Chris@5 17 * - import: string to DomDocument.
Chris@5 18 * - export: DomDocument to string.
Chris@5 19 * - non_root: (optional) Assume the passed HTML is not a complete hierarchy,
Chris@5 20 * but only a subset inside body element. Defaults to true.
Chris@5 21 *
Chris@5 22 * The following keys are only used if the method is 'import':
Chris@5 23 * - log_messages: (optional) When parsing HTML, libxml may trigger
Chris@5 24 * warnings. If this option is set to true, it will log them as migration
Chris@5 25 * messages. Otherwise, it will not handle it in a special way. Defaults to
Chris@5 26 * true.
Chris@5 27 * - version: (optional) The version number of the document as part of the XML
Chris@5 28 * declaration. Defaults to '1.0'.
Chris@5 29 * - encoding: (optional) The encoding of the document as part of the XML
Chris@5 30 * declaration. Defaults to 'UTF-8'.
Chris@5 31 *
Chris@5 32 * @codingStandardsIgnoreStart
Chris@5 33 *
Chris@5 34 * Examples:
Chris@5 35 * @code
Chris@5 36 * process:
Chris@5 37 * 'body/value':
Chris@5 38 * -
Chris@5 39 * plugin: dom
Chris@5 40 * method: import
Chris@5 41 * source: 'body/0/value'
Chris@5 42 * -
Chris@5 43 * plugin: dom
Chris@5 44 * method: export
Chris@5 45 * @endcode
Chris@5 46 * This example above will convert the input string to a DOMDocument object and
Chris@5 47 * back, with no explicit processing. It should have few noticeable effects.
Chris@5 48 *
Chris@5 49 * @code
Chris@5 50 * process:
Chris@5 51 * 'body/value':
Chris@5 52 * -
Chris@5 53 * plugin: dom
Chris@5 54 * method: import
Chris@5 55 * source: 'body/0/value'
Chris@5 56 * non_root: true
Chris@5 57 * log_messages: true
Chris@5 58 * version: '1.0'
Chris@5 59 * encoding: UTF-8
Chris@5 60 * -
Chris@5 61 * plugin: dom
Chris@5 62 * method: export
Chris@5 63 * non_root: true
Chris@5 64 * @endcode
Chris@5 65 * This example above will have the same effect as the previous example, since
Chris@5 66 * it specifies the default values for all the optional parameters.
Chris@5 67 *
Chris@5 68 * @codingStandardsIgnoreEnd
Chris@5 69 *
Chris@5 70 * @MigrateProcessPlugin(
Chris@5 71 * id = "dom"
Chris@5 72 * )
Chris@5 73 */
Chris@5 74 class Dom extends ProcessPluginBase {
Chris@5 75
Chris@5 76 /**
Chris@5 77 * If parsing warnings should be logged as migrate messages.
Chris@5 78 *
Chris@5 79 * @var bool
Chris@5 80 */
Chris@5 81 protected $logMessages = TRUE;
Chris@5 82
Chris@5 83 /**
Chris@5 84 * The HTML contains only the piece inside the body element.
Chris@5 85 *
Chris@5 86 * @var bool
Chris@5 87 */
Chris@5 88 protected $nonRoot = TRUE;
Chris@5 89
Chris@5 90 /**
Chris@5 91 * {@inheritdoc}
Chris@5 92 */
Chris@5 93 public function __construct(array $configuration, $plugin_id, $plugin_definition) {
Chris@5 94 if (!isset($configuration['method'])) {
Chris@5 95 throw new \InvalidArgumentException('The "method" must be set.');
Chris@5 96 }
Chris@5 97 if (!in_array($configuration['method'], ['import', 'export'])) {
Chris@5 98 throw new \InvalidArgumentException('The "method" must be "import" or "export".');
Chris@5 99 }
Chris@5 100 parent::__construct($configuration, $plugin_id, $plugin_definition);
Chris@5 101 $this->configuration += $this->defaultValues();
Chris@5 102 $this->logMessages = (bool) $this->configuration['log_messages'];
Chris@5 103 $this->nonRoot = (bool) $this->configuration['non_root'];
Chris@5 104 }
Chris@5 105
Chris@5 106 /**
Chris@5 107 * Supply default values of all optional parameters.
Chris@5 108 *
Chris@5 109 * @return array
Chris@5 110 * An array with keys the optional parameters and values the corresponding
Chris@5 111 * defaults.
Chris@5 112 */
Chris@5 113 protected function defaultValues() {
Chris@5 114 return [
Chris@5 115 'non_root' => TRUE,
Chris@5 116 'log_messages' => TRUE,
Chris@5 117 'version' => '1.0',
Chris@5 118 'encoding' => 'UTF-8',
Chris@5 119 ];
Chris@5 120 }
Chris@5 121
Chris@5 122 /**
Chris@5 123 * Converts a HTML string into a DOMDocument.
Chris@5 124 *
Chris@5 125 * It is not using \Drupal\Component\Utility\Html::load() because it ignores
Chris@5 126 * all errors on import, and therefore incompatible with log_messages
Chris@5 127 * option.
Chris@5 128 *
Chris@5 129 * @param mixed $value
Chris@5 130 * The string to be imported.
Chris@5 131 * @param \Drupal\migrate\MigrateExecutableInterface $migrate_executable
Chris@5 132 * The migration in which this process is being executed.
Chris@5 133 * @param \Drupal\migrate\Row $row
Chris@5 134 * The row from the source to process. Normally, just transforming the value
Chris@5 135 * is adequate but very rarely you might need to change two columns at the
Chris@5 136 * same time or something like that.
Chris@5 137 * @param string $destination_property
Chris@5 138 * The destination property currently worked on. This is only used together
Chris@5 139 * with the $row above.
Chris@5 140 *
Chris@5 141 * @return \DOMDocument
Chris@5 142 * The document object based on the provided string.
Chris@5 143 *
Chris@5 144 * @throws \Drupal\migrate\MigrateException
Chris@5 145 * When the received $value is not a string.
Chris@5 146 */
Chris@5 147 public function import($value, MigrateExecutableInterface $migrate_executable, Row $row, $destination_property) {
Chris@5 148 if (!is_string($value)) {
Chris@5 149 throw new MigrateException('Cannot import a non-string value.');
Chris@5 150 }
Chris@5 151
Chris@5 152 if ($this->logMessages) {
Chris@5 153 set_error_handler(function ($errno, $errstr) use ($migrate_executable) {
Chris@5 154 $migrate_executable->saveMessage($errstr, MigrationInterface::MESSAGE_WARNING);
Chris@5 155 });
Chris@5 156 }
Chris@5 157
Chris@5 158 if ($this->nonRoot) {
Chris@5 159 $html = $this->getNonRootHtml($value);
Chris@5 160 }
Chris@5 161 else {
Chris@5 162 $html = $value;
Chris@5 163 }
Chris@5 164
Chris@5 165 $document = new \DOMDocument($this->configuration['version'], $this->configuration['encoding']);
Chris@5 166 $document->loadHTML($html);
Chris@5 167
Chris@5 168 if ($this->logMessages) {
Chris@5 169 restore_error_handler();
Chris@5 170 }
Chris@5 171
Chris@5 172 return $document;
Chris@5 173 }
Chris@5 174
Chris@5 175 /**
Chris@5 176 * Converts a DOMDocument into a HTML string.
Chris@5 177 *
Chris@5 178 * @param mixed $value
Chris@5 179 * The document to be exported.
Chris@5 180 * @param \Drupal\migrate\MigrateExecutableInterface $migrate_executable
Chris@5 181 * The migration in which this process is being executed.
Chris@5 182 * @param \Drupal\migrate\Row $row
Chris@5 183 * The row from the source to process. Normally, just transforming the value
Chris@5 184 * is adequate but very rarely you might need to change two columns at the
Chris@5 185 * same time or something like that.
Chris@5 186 * @param string $destination_property
Chris@5 187 * The destination property currently worked on. This is only used together
Chris@5 188 * with the $row above.
Chris@5 189 *
Chris@5 190 * @return string
Chris@5 191 * The HTML string corresponding to the provided document object.
Chris@5 192 *
Chris@5 193 * @throws \Drupal\migrate\MigrateException
Chris@5 194 * When the received $value is not a \DOMDocument.
Chris@5 195 */
Chris@5 196 public function export($value, MigrateExecutableInterface $migrate_executable, Row $row, $destination_property) {
Chris@5 197 if (!$value instanceof \DOMDocument) {
Chris@5 198 $value_description = (gettype($value) == 'object') ? get_class($value) : gettype($value);
Chris@5 199 throw new MigrateException(sprintf('Cannot export a "%s".', $value_description));
Chris@5 200 }
Chris@5 201
Chris@5 202 if ($this->nonRoot) {
Chris@5 203 return Html::serialize($value);
Chris@5 204 }
Chris@5 205 return $value->saveHTML();
Chris@5 206 }
Chris@5 207
Chris@5 208 /**
Chris@5 209 * Builds an full html string based on a partial.
Chris@5 210 *
Chris@5 211 * @param string $partial
Chris@5 212 * A subset of a full html string. For instance the contents of the body
Chris@5 213 * element.
Chris@5 214 */
Chris@5 215 protected function getNonRootHtml($partial) {
Chris@5 216 $replacements = [
Chris@5 217 "\n" => '',
Chris@5 218 '!encoding' => strtolower($this->configuration['encoding']),
Chris@5 219 '!value' => $partial,
Chris@5 220 ];
Chris@5 221 // Prepend the html with a header using the configured source encoding.
Chris@5 222 // By default, loadHTML() assumes ISO-8859-1.
Chris@5 223 $html_template = <<<EOD
Chris@5 224 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
Chris@5 225 <html xmlns="http://www.w3.org/1999/xhtml">
Chris@5 226 <head><meta http-equiv="Content-Type" content="text/html; charset=!encoding" /></head>
Chris@5 227 <body>!value</body>
Chris@5 228 </html>
Chris@5 229 EOD;
Chris@5 230 return strtr($html_template, $replacements);
Chris@5 231 }
Chris@5 232
Chris@5 233 }