Mercurial > hg > isophonics-drupal-site
comparison core/lib/Drupal/Component/Gettext/PoStreamReader.php @ 0:4c8ae668cc8c
Initial import (non-working)
author | Chris Cannam |
---|---|
date | Wed, 29 Nov 2017 16:09:58 +0000 |
parents | |
children | 1fec387a4317 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4c8ae668cc8c |
---|---|
1 <?php | |
2 | |
3 namespace Drupal\Component\Gettext; | |
4 | |
5 use Drupal\Component\Utility\SafeMarkup; | |
6 | |
7 /** | |
8 * Implements Gettext PO stream reader. | |
9 * | |
10 * The PO file format parsing is implemented according to the documentation at | |
11 * http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files | |
12 */ | |
13 class PoStreamReader implements PoStreamInterface, PoReaderInterface { | |
14 | |
15 /** | |
16 * Source line number of the stream being parsed. | |
17 * | |
18 * @var int | |
19 */ | |
20 private $_line_number = 0; | |
21 | |
22 /** | |
23 * Parser context for the stream reader state machine. | |
24 * | |
25 * Possible contexts are: | |
26 * - 'COMMENT' (#) | |
27 * - 'MSGID' (msgid) | |
28 * - 'MSGID_PLURAL' (msgid_plural) | |
29 * - 'MSGCTXT' (msgctxt) | |
30 * - 'MSGSTR' (msgstr or msgstr[]) | |
31 * - 'MSGSTR_ARR' (msgstr_arg) | |
32 * | |
33 * @var string | |
34 */ | |
35 private $_context = 'COMMENT'; | |
36 | |
37 /** | |
38 * Current entry being read. Incomplete. | |
39 * | |
40 * @var array | |
41 */ | |
42 private $_current_item = []; | |
43 | |
44 /** | |
45 * Current plural index for plural translations. | |
46 * | |
47 * @var int | |
48 */ | |
49 private $_current_plural_index = 0; | |
50 | |
51 /** | |
52 * URI of the PO stream that is being read. | |
53 * | |
54 * @var string | |
55 */ | |
56 private $_uri = ''; | |
57 | |
58 /** | |
59 * Language code for the PO stream being read. | |
60 * | |
61 * @var string | |
62 */ | |
63 private $_langcode = NULL; | |
64 | |
65 /** | |
66 * File handle of the current PO stream. | |
67 * | |
68 * @var resource | |
69 */ | |
70 private $_fd; | |
71 | |
72 /** | |
73 * The PO stream header. | |
74 * | |
75 * @var \Drupal\Component\Gettext\PoHeader | |
76 */ | |
77 private $_header; | |
78 | |
79 /** | |
80 * Object wrapper for the last read source/translation pair. | |
81 * | |
82 * @var \Drupal\Component\Gettext\PoItem | |
83 */ | |
84 private $_last_item; | |
85 | |
86 /** | |
87 * Indicator of whether the stream reading is finished. | |
88 * | |
89 * @var bool | |
90 */ | |
91 private $_finished; | |
92 | |
93 /** | |
94 * Array of translated error strings recorded on reading this stream so far. | |
95 * | |
96 * @var array | |
97 */ | |
98 private $_errors; | |
99 | |
100 /** | |
101 * {@inheritdoc} | |
102 */ | |
103 public function getLangcode() { | |
104 return $this->_langcode; | |
105 } | |
106 | |
107 /** | |
108 * {@inheritdoc} | |
109 */ | |
110 public function setLangcode($langcode) { | |
111 $this->_langcode = $langcode; | |
112 } | |
113 | |
114 /** | |
115 * {@inheritdoc} | |
116 */ | |
117 public function getHeader() { | |
118 return $this->_header; | |
119 } | |
120 | |
121 /** | |
122 * Implements Drupal\Component\Gettext\PoMetadataInterface::setHeader(). | |
123 * | |
124 * Not applicable to stream reading and therefore not implemented. | |
125 */ | |
126 public function setHeader(PoHeader $header) { | |
127 } | |
128 | |
129 /** | |
130 * {@inheritdoc} | |
131 */ | |
132 public function getURI() { | |
133 return $this->_uri; | |
134 } | |
135 | |
136 /** | |
137 * {@inheritdoc} | |
138 */ | |
139 public function setURI($uri) { | |
140 $this->_uri = $uri; | |
141 } | |
142 | |
143 /** | |
144 * Implements Drupal\Component\Gettext\PoStreamInterface::open(). | |
145 * | |
146 * Opens the stream and reads the header. The stream is ready for reading | |
147 * items after. | |
148 * | |
149 * @throws Exception | |
150 * If the URI is not yet set. | |
151 */ | |
152 public function open() { | |
153 if (!empty($this->_uri)) { | |
154 $this->_fd = fopen($this->_uri, 'rb'); | |
155 $this->readHeader(); | |
156 } | |
157 else { | |
158 throw new \Exception('Cannot open stream without URI set.'); | |
159 } | |
160 } | |
161 | |
162 /** | |
163 * Implements Drupal\Component\Gettext\PoStreamInterface::close(). | |
164 * | |
165 * @throws Exception | |
166 * If the stream is not open. | |
167 */ | |
168 public function close() { | |
169 if ($this->_fd) { | |
170 fclose($this->_fd); | |
171 } | |
172 else { | |
173 throw new \Exception('Cannot close stream that is not open.'); | |
174 } | |
175 } | |
176 | |
177 /** | |
178 * {@inheritdoc} | |
179 */ | |
180 public function readItem() { | |
181 // Clear out the last item. | |
182 $this->_last_item = NULL; | |
183 | |
184 // Read until finished with the stream or a complete item was identified. | |
185 while (!$this->_finished && is_null($this->_last_item)) { | |
186 $this->readLine(); | |
187 } | |
188 | |
189 return $this->_last_item; | |
190 } | |
191 | |
192 /** | |
193 * Sets the seek position for the current PO stream. | |
194 * | |
195 * @param int $seek | |
196 * The new seek position to set. | |
197 */ | |
198 public function setSeek($seek) { | |
199 fseek($this->_fd, $seek); | |
200 } | |
201 | |
202 /** | |
203 * Gets the pointer position of the current PO stream. | |
204 */ | |
205 public function getSeek() { | |
206 return ftell($this->_fd); | |
207 } | |
208 | |
209 /** | |
210 * Read the header from the PO stream. | |
211 * | |
212 * The header is a special case PoItem, using the empty string as source and | |
213 * key-value pairs as translation. We just reuse the item reader logic to | |
214 * read the header. | |
215 */ | |
216 private function readHeader() { | |
217 $item = $this->readItem(); | |
218 // Handle the case properly when the .po file is empty (0 bytes). | |
219 if (!$item) { | |
220 return; | |
221 } | |
222 $header = new PoHeader(); | |
223 $header->setFromString(trim($item->getTranslation())); | |
224 $this->_header = $header; | |
225 } | |
226 | |
227 /** | |
228 * Reads a line from the PO stream and stores data internally. | |
229 * | |
230 * Expands $this->_current_item based on new data for the current item. If | |
231 * this line ends the current item, it is saved with setItemFromArray() with | |
232 * data from $this->_current_item. | |
233 * | |
234 * An internal state machine is maintained in this reader using | |
235 * $this->_context as the reading state. PO items are in between COMMENT | |
236 * states (when items have at least one line or comment in between them) or | |
237 * indicated by MSGSTR or MSGSTR_ARR followed immediately by an MSGID or | |
238 * MSGCTXT (when items closely follow each other). | |
239 * | |
240 * @return | |
241 * FALSE if an error was logged, NULL otherwise. The errors are considered | |
242 * non-blocking, so reading can continue, while the errors are collected | |
243 * for later presentation. | |
244 */ | |
245 private function readLine() { | |
246 // Read a line and set the stream finished indicator if it was not | |
247 // possible anymore. | |
248 $line = fgets($this->_fd); | |
249 $this->_finished = ($line === FALSE); | |
250 | |
251 if (!$this->_finished) { | |
252 | |
253 if ($this->_line_number == 0) { | |
254 // The first line might come with a UTF-8 BOM, which should be removed. | |
255 $line = str_replace("\xEF\xBB\xBF", '', $line); | |
256 // Current plurality for 'msgstr[]'. | |
257 $this->_current_plural_index = 0; | |
258 } | |
259 | |
260 // Track the line number for error reporting. | |
261 $this->_line_number++; | |
262 | |
263 // Initialize common values for error logging. | |
264 $log_vars = [ | |
265 '%uri' => $this->getURI(), | |
266 '%line' => $this->_line_number, | |
267 ]; | |
268 | |
269 // Trim away the linefeed. \\n might appear at the end of the string if | |
270 // another line continuing the same string follows. We can remove that. | |
271 $line = trim(strtr($line, ["\\\n" => ""])); | |
272 | |
273 if (!strncmp('#', $line, 1)) { | |
274 // Lines starting with '#' are comments. | |
275 | |
276 if ($this->_context == 'COMMENT') { | |
277 // Already in comment context, add to current comment. | |
278 $this->_current_item['#'][] = substr($line, 1); | |
279 } | |
280 elseif (($this->_context == 'MSGSTR') || ($this->_context == 'MSGSTR_ARR')) { | |
281 // We are currently in string context, save current item. | |
282 $this->setItemFromArray($this->_current_item); | |
283 | |
284 // Start a new entry for the comment. | |
285 $this->_current_item = []; | |
286 $this->_current_item['#'][] = substr($line, 1); | |
287 | |
288 $this->_context = 'COMMENT'; | |
289 return; | |
290 } | |
291 else { | |
292 // A comment following any other context is a syntax error. | |
293 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: "msgstr" was expected but not found on line %line.', $log_vars); | |
294 return FALSE; | |
295 } | |
296 return; | |
297 } | |
298 elseif (!strncmp('msgid_plural', $line, 12)) { | |
299 // A plural form for the current source string. | |
300 | |
301 if ($this->_context != 'MSGID') { | |
302 // A plural form can only be added to an msgid directly. | |
303 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: "msgid_plural" was expected but not found on line %line.', $log_vars); | |
304 return FALSE; | |
305 } | |
306 | |
307 // Remove 'msgid_plural' and trim away whitespace. | |
308 $line = trim(substr($line, 12)); | |
309 | |
310 // Only the plural source string is left, parse it. | |
311 $quoted = $this->parseQuoted($line); | |
312 if ($quoted === FALSE) { | |
313 // The plural form must be wrapped in quotes. | |
314 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains a syntax error on line %line.', $log_vars); | |
315 return FALSE; | |
316 } | |
317 | |
318 // Append the plural source to the current entry. | |
319 if (is_string($this->_current_item['msgid'])) { | |
320 // The first value was stored as string. Now we know the context is | |
321 // plural, it is converted to array. | |
322 $this->_current_item['msgid'] = [$this->_current_item['msgid']]; | |
323 } | |
324 $this->_current_item['msgid'][] = $quoted; | |
325 | |
326 $this->_context = 'MSGID_PLURAL'; | |
327 return; | |
328 } | |
329 elseif (!strncmp('msgid', $line, 5)) { | |
330 // Starting a new message. | |
331 | |
332 if (($this->_context == 'MSGSTR') || ($this->_context == 'MSGSTR_ARR')) { | |
333 // We are currently in string context, save current item. | |
334 $this->setItemFromArray($this->_current_item); | |
335 | |
336 // Start a new context for the msgid. | |
337 $this->_current_item = []; | |
338 } | |
339 elseif ($this->_context == 'MSGID') { | |
340 // We are currently already in the context, meaning we passed an id with no data. | |
341 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: "msgid" is unexpected on line %line.', $log_vars); | |
342 return FALSE; | |
343 } | |
344 | |
345 // Remove 'msgid' and trim away whitespace. | |
346 $line = trim(substr($line, 5)); | |
347 | |
348 // Only the message id string is left, parse it. | |
349 $quoted = $this->parseQuoted($line); | |
350 if ($quoted === FALSE) { | |
351 // The message id must be wrapped in quotes. | |
352 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: invalid format for "msgid" on line %line.', $log_vars, $log_vars); | |
353 return FALSE; | |
354 } | |
355 | |
356 $this->_current_item['msgid'] = $quoted; | |
357 $this->_context = 'MSGID'; | |
358 return; | |
359 } | |
360 elseif (!strncmp('msgctxt', $line, 7)) { | |
361 // Starting a new context. | |
362 | |
363 if (($this->_context == 'MSGSTR') || ($this->_context == 'MSGSTR_ARR')) { | |
364 // We are currently in string context, save current item. | |
365 $this->setItemFromArray($this->_current_item); | |
366 $this->_current_item = []; | |
367 } | |
368 elseif (!empty($this->_current_item['msgctxt'])) { | |
369 // A context cannot apply to another context. | |
370 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: "msgctxt" is unexpected on line %line.', $log_vars); | |
371 return FALSE; | |
372 } | |
373 | |
374 // Remove 'msgctxt' and trim away whitespaces. | |
375 $line = trim(substr($line, 7)); | |
376 | |
377 // Only the msgctxt string is left, parse it. | |
378 $quoted = $this->parseQuoted($line); | |
379 if ($quoted === FALSE) { | |
380 // The context string must be quoted. | |
381 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: invalid format for "msgctxt" on line %line.', $log_vars); | |
382 return FALSE; | |
383 } | |
384 | |
385 $this->_current_item['msgctxt'] = $quoted; | |
386 | |
387 $this->_context = 'MSGCTXT'; | |
388 return; | |
389 } | |
390 elseif (!strncmp('msgstr[', $line, 7)) { | |
391 // A message string for a specific plurality. | |
392 | |
393 if (($this->_context != 'MSGID') && | |
394 ($this->_context != 'MSGCTXT') && | |
395 ($this->_context != 'MSGID_PLURAL') && | |
396 ($this->_context != 'MSGSTR_ARR')) { | |
397 // Plural message strings must come after msgid, msgxtxt, | |
398 // msgid_plural, or other msgstr[] entries. | |
399 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: "msgstr[]" is unexpected on line %line.', $log_vars); | |
400 return FALSE; | |
401 } | |
402 | |
403 // Ensure the plurality is terminated. | |
404 if (strpos($line, ']') === FALSE) { | |
405 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: invalid format for "msgstr[]" on line %line.', $log_vars); | |
406 return FALSE; | |
407 } | |
408 | |
409 // Extract the plurality. | |
410 $frombracket = strstr($line, '['); | |
411 $this->_current_plural_index = substr($frombracket, 1, strpos($frombracket, ']') - 1); | |
412 | |
413 // Skip to the next whitespace and trim away any further whitespace, | |
414 // bringing $line to the message text only. | |
415 $line = trim(strstr($line, " ")); | |
416 | |
417 $quoted = $this->parseQuoted($line); | |
418 if ($quoted === FALSE) { | |
419 // The string must be quoted. | |
420 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: invalid format for "msgstr[]" on line %line.', $log_vars); | |
421 return FALSE; | |
422 } | |
423 if (!isset($this->_current_item['msgstr']) || !is_array($this->_current_item['msgstr'])) { | |
424 $this->_current_item['msgstr'] = []; | |
425 } | |
426 | |
427 $this->_current_item['msgstr'][$this->_current_plural_index] = $quoted; | |
428 | |
429 $this->_context = 'MSGSTR_ARR'; | |
430 return; | |
431 } | |
432 elseif (!strncmp("msgstr", $line, 6)) { | |
433 // A string pair for an msgid (with optional context). | |
434 | |
435 if (($this->_context != 'MSGID') && ($this->_context != 'MSGCTXT')) { | |
436 // Strings are only valid within an id or context scope. | |
437 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: "msgstr" is unexpected on line %line.', $log_vars); | |
438 return FALSE; | |
439 } | |
440 | |
441 // Remove 'msgstr' and trim away away whitespaces. | |
442 $line = trim(substr($line, 6)); | |
443 | |
444 // Only the msgstr string is left, parse it. | |
445 $quoted = $this->parseQuoted($line); | |
446 if ($quoted === FALSE) { | |
447 // The string must be quoted. | |
448 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: invalid format for "msgstr" on line %line.', $log_vars); | |
449 return FALSE; | |
450 } | |
451 | |
452 $this->_current_item['msgstr'] = $quoted; | |
453 | |
454 $this->_context = 'MSGSTR'; | |
455 return; | |
456 } | |
457 elseif ($line != '') { | |
458 // Anything that is not a token may be a continuation of a previous token. | |
459 | |
460 $quoted = $this->parseQuoted($line); | |
461 if ($quoted === FALSE) { | |
462 // This string must be quoted. | |
463 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: string continuation expected on line %line.', $log_vars); | |
464 return FALSE; | |
465 } | |
466 | |
467 // Append the string to the current item. | |
468 if (($this->_context == 'MSGID') || ($this->_context == 'MSGID_PLURAL')) { | |
469 if (is_array($this->_current_item['msgid'])) { | |
470 // Add string to last array element for plural sources. | |
471 $last_index = count($this->_current_item['msgid']) - 1; | |
472 $this->_current_item['msgid'][$last_index] .= $quoted; | |
473 } | |
474 else { | |
475 // Singular source, just append the string. | |
476 $this->_current_item['msgid'] .= $quoted; | |
477 } | |
478 } | |
479 elseif ($this->_context == 'MSGCTXT') { | |
480 // Multiline context name. | |
481 $this->_current_item['msgctxt'] .= $quoted; | |
482 } | |
483 elseif ($this->_context == 'MSGSTR') { | |
484 // Multiline translation string. | |
485 $this->_current_item['msgstr'] .= $quoted; | |
486 } | |
487 elseif ($this->_context == 'MSGSTR_ARR') { | |
488 // Multiline plural translation string. | |
489 $this->_current_item['msgstr'][$this->_current_plural_index] .= $quoted; | |
490 } | |
491 else { | |
492 // No valid context to append to. | |
493 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: unexpected string on line %line.', $log_vars); | |
494 return FALSE; | |
495 } | |
496 return; | |
497 } | |
498 } | |
499 | |
500 // Empty line read or EOF of PO stream, close out the last entry. | |
501 if (($this->_context == 'MSGSTR') || ($this->_context == 'MSGSTR_ARR')) { | |
502 $this->setItemFromArray($this->_current_item); | |
503 $this->_current_item = []; | |
504 } | |
505 elseif ($this->_context != 'COMMENT') { | |
506 $this->_errors[] = SafeMarkup::format('The translation stream %uri ended unexpectedly at line %line.', $log_vars); | |
507 return FALSE; | |
508 } | |
509 } | |
510 | |
511 /** | |
512 * Store the parsed values as a PoItem object. | |
513 */ | |
514 public function setItemFromArray($value) { | |
515 $plural = FALSE; | |
516 | |
517 $comments = ''; | |
518 if (isset($value['#'])) { | |
519 $comments = $this->shortenComments($value['#']); | |
520 } | |
521 | |
522 if (is_array($value['msgstr'])) { | |
523 // Sort plural variants by their form index. | |
524 ksort($value['msgstr']); | |
525 $plural = TRUE; | |
526 } | |
527 | |
528 $item = new PoItem(); | |
529 $item->setContext(isset($value['msgctxt']) ? $value['msgctxt'] : ''); | |
530 $item->setSource($value['msgid']); | |
531 $item->setTranslation($value['msgstr']); | |
532 $item->setPlural($plural); | |
533 $item->setComment($comments); | |
534 $item->setLangcode($this->_langcode); | |
535 | |
536 $this->_last_item = $item; | |
537 | |
538 $this->_context = 'COMMENT'; | |
539 } | |
540 | |
541 /** | |
542 * Parses a string in quotes. | |
543 * | |
544 * @param $string | |
545 * A string specified with enclosing quotes. | |
546 * | |
547 * @return | |
548 * The string parsed from inside the quotes. | |
549 */ | |
550 public function parseQuoted($string) { | |
551 if (substr($string, 0, 1) != substr($string, -1, 1)) { | |
552 // Start and end quotes must be the same. | |
553 return FALSE; | |
554 } | |
555 $quote = substr($string, 0, 1); | |
556 $string = substr($string, 1, -1); | |
557 if ($quote == '"') { | |
558 // Double quotes: strip slashes. | |
559 return stripcslashes($string); | |
560 } | |
561 elseif ($quote == "'") { | |
562 // Simple quote: return as-is. | |
563 return $string; | |
564 } | |
565 else { | |
566 // Unrecognized quote. | |
567 return FALSE; | |
568 } | |
569 } | |
570 | |
571 /** | |
572 * Generates a short, one-string version of the passed comment array. | |
573 * | |
574 * @param $comment | |
575 * An array of strings containing a comment. | |
576 * | |
577 * @return | |
578 * Short one-string version of the comment. | |
579 */ | |
580 private function shortenComments($comment) { | |
581 $comm = ''; | |
582 while (count($comment)) { | |
583 $test = $comm . substr(array_shift($comment), 1) . ', '; | |
584 if (strlen($comm) < 130) { | |
585 $comm = $test; | |
586 } | |
587 else { | |
588 break; | |
589 } | |
590 } | |
591 return trim(substr($comm, 0, -2)); | |
592 } | |
593 | |
594 } |