comparison core/lib/Drupal/Component/Gettext/PoStreamReader.php @ 0:4c8ae668cc8c

Initial import (non-working)
author Chris Cannam
date Wed, 29 Nov 2017 16:09:58 +0000
parents
children 1fec387a4317
comparison
equal deleted inserted replaced
-1:000000000000 0:4c8ae668cc8c
1 <?php
2
3 namespace Drupal\Component\Gettext;
4
5 use Drupal\Component\Utility\SafeMarkup;
6
7 /**
8 * Implements Gettext PO stream reader.
9 *
10 * The PO file format parsing is implemented according to the documentation at
11 * http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files
12 */
13 class PoStreamReader implements PoStreamInterface, PoReaderInterface {
14
15 /**
16 * Source line number of the stream being parsed.
17 *
18 * @var int
19 */
20 private $_line_number = 0;
21
22 /**
23 * Parser context for the stream reader state machine.
24 *
25 * Possible contexts are:
26 * - 'COMMENT' (#)
27 * - 'MSGID' (msgid)
28 * - 'MSGID_PLURAL' (msgid_plural)
29 * - 'MSGCTXT' (msgctxt)
30 * - 'MSGSTR' (msgstr or msgstr[])
31 * - 'MSGSTR_ARR' (msgstr_arg)
32 *
33 * @var string
34 */
35 private $_context = 'COMMENT';
36
37 /**
38 * Current entry being read. Incomplete.
39 *
40 * @var array
41 */
42 private $_current_item = [];
43
44 /**
45 * Current plural index for plural translations.
46 *
47 * @var int
48 */
49 private $_current_plural_index = 0;
50
51 /**
52 * URI of the PO stream that is being read.
53 *
54 * @var string
55 */
56 private $_uri = '';
57
58 /**
59 * Language code for the PO stream being read.
60 *
61 * @var string
62 */
63 private $_langcode = NULL;
64
65 /**
66 * File handle of the current PO stream.
67 *
68 * @var resource
69 */
70 private $_fd;
71
72 /**
73 * The PO stream header.
74 *
75 * @var \Drupal\Component\Gettext\PoHeader
76 */
77 private $_header;
78
79 /**
80 * Object wrapper for the last read source/translation pair.
81 *
82 * @var \Drupal\Component\Gettext\PoItem
83 */
84 private $_last_item;
85
86 /**
87 * Indicator of whether the stream reading is finished.
88 *
89 * @var bool
90 */
91 private $_finished;
92
93 /**
94 * Array of translated error strings recorded on reading this stream so far.
95 *
96 * @var array
97 */
98 private $_errors;
99
100 /**
101 * {@inheritdoc}
102 */
103 public function getLangcode() {
104 return $this->_langcode;
105 }
106
107 /**
108 * {@inheritdoc}
109 */
110 public function setLangcode($langcode) {
111 $this->_langcode = $langcode;
112 }
113
114 /**
115 * {@inheritdoc}
116 */
117 public function getHeader() {
118 return $this->_header;
119 }
120
121 /**
122 * Implements Drupal\Component\Gettext\PoMetadataInterface::setHeader().
123 *
124 * Not applicable to stream reading and therefore not implemented.
125 */
126 public function setHeader(PoHeader $header) {
127 }
128
129 /**
130 * {@inheritdoc}
131 */
132 public function getURI() {
133 return $this->_uri;
134 }
135
136 /**
137 * {@inheritdoc}
138 */
139 public function setURI($uri) {
140 $this->_uri = $uri;
141 }
142
143 /**
144 * Implements Drupal\Component\Gettext\PoStreamInterface::open().
145 *
146 * Opens the stream and reads the header. The stream is ready for reading
147 * items after.
148 *
149 * @throws Exception
150 * If the URI is not yet set.
151 */
152 public function open() {
153 if (!empty($this->_uri)) {
154 $this->_fd = fopen($this->_uri, 'rb');
155 $this->readHeader();
156 }
157 else {
158 throw new \Exception('Cannot open stream without URI set.');
159 }
160 }
161
162 /**
163 * Implements Drupal\Component\Gettext\PoStreamInterface::close().
164 *
165 * @throws Exception
166 * If the stream is not open.
167 */
168 public function close() {
169 if ($this->_fd) {
170 fclose($this->_fd);
171 }
172 else {
173 throw new \Exception('Cannot close stream that is not open.');
174 }
175 }
176
177 /**
178 * {@inheritdoc}
179 */
180 public function readItem() {
181 // Clear out the last item.
182 $this->_last_item = NULL;
183
184 // Read until finished with the stream or a complete item was identified.
185 while (!$this->_finished && is_null($this->_last_item)) {
186 $this->readLine();
187 }
188
189 return $this->_last_item;
190 }
191
192 /**
193 * Sets the seek position for the current PO stream.
194 *
195 * @param int $seek
196 * The new seek position to set.
197 */
198 public function setSeek($seek) {
199 fseek($this->_fd, $seek);
200 }
201
202 /**
203 * Gets the pointer position of the current PO stream.
204 */
205 public function getSeek() {
206 return ftell($this->_fd);
207 }
208
209 /**
210 * Read the header from the PO stream.
211 *
212 * The header is a special case PoItem, using the empty string as source and
213 * key-value pairs as translation. We just reuse the item reader logic to
214 * read the header.
215 */
216 private function readHeader() {
217 $item = $this->readItem();
218 // Handle the case properly when the .po file is empty (0 bytes).
219 if (!$item) {
220 return;
221 }
222 $header = new PoHeader();
223 $header->setFromString(trim($item->getTranslation()));
224 $this->_header = $header;
225 }
226
227 /**
228 * Reads a line from the PO stream and stores data internally.
229 *
230 * Expands $this->_current_item based on new data for the current item. If
231 * this line ends the current item, it is saved with setItemFromArray() with
232 * data from $this->_current_item.
233 *
234 * An internal state machine is maintained in this reader using
235 * $this->_context as the reading state. PO items are in between COMMENT
236 * states (when items have at least one line or comment in between them) or
237 * indicated by MSGSTR or MSGSTR_ARR followed immediately by an MSGID or
238 * MSGCTXT (when items closely follow each other).
239 *
240 * @return
241 * FALSE if an error was logged, NULL otherwise. The errors are considered
242 * non-blocking, so reading can continue, while the errors are collected
243 * for later presentation.
244 */
245 private function readLine() {
246 // Read a line and set the stream finished indicator if it was not
247 // possible anymore.
248 $line = fgets($this->_fd);
249 $this->_finished = ($line === FALSE);
250
251 if (!$this->_finished) {
252
253 if ($this->_line_number == 0) {
254 // The first line might come with a UTF-8 BOM, which should be removed.
255 $line = str_replace("\xEF\xBB\xBF", '', $line);
256 // Current plurality for 'msgstr[]'.
257 $this->_current_plural_index = 0;
258 }
259
260 // Track the line number for error reporting.
261 $this->_line_number++;
262
263 // Initialize common values for error logging.
264 $log_vars = [
265 '%uri' => $this->getURI(),
266 '%line' => $this->_line_number,
267 ];
268
269 // Trim away the linefeed. \\n might appear at the end of the string if
270 // another line continuing the same string follows. We can remove that.
271 $line = trim(strtr($line, ["\\\n" => ""]));
272
273 if (!strncmp('#', $line, 1)) {
274 // Lines starting with '#' are comments.
275
276 if ($this->_context == 'COMMENT') {
277 // Already in comment context, add to current comment.
278 $this->_current_item['#'][] = substr($line, 1);
279 }
280 elseif (($this->_context == 'MSGSTR') || ($this->_context == 'MSGSTR_ARR')) {
281 // We are currently in string context, save current item.
282 $this->setItemFromArray($this->_current_item);
283
284 // Start a new entry for the comment.
285 $this->_current_item = [];
286 $this->_current_item['#'][] = substr($line, 1);
287
288 $this->_context = 'COMMENT';
289 return;
290 }
291 else {
292 // A comment following any other context is a syntax error.
293 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: "msgstr" was expected but not found on line %line.', $log_vars);
294 return FALSE;
295 }
296 return;
297 }
298 elseif (!strncmp('msgid_plural', $line, 12)) {
299 // A plural form for the current source string.
300
301 if ($this->_context != 'MSGID') {
302 // A plural form can only be added to an msgid directly.
303 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: "msgid_plural" was expected but not found on line %line.', $log_vars);
304 return FALSE;
305 }
306
307 // Remove 'msgid_plural' and trim away whitespace.
308 $line = trim(substr($line, 12));
309
310 // Only the plural source string is left, parse it.
311 $quoted = $this->parseQuoted($line);
312 if ($quoted === FALSE) {
313 // The plural form must be wrapped in quotes.
314 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains a syntax error on line %line.', $log_vars);
315 return FALSE;
316 }
317
318 // Append the plural source to the current entry.
319 if (is_string($this->_current_item['msgid'])) {
320 // The first value was stored as string. Now we know the context is
321 // plural, it is converted to array.
322 $this->_current_item['msgid'] = [$this->_current_item['msgid']];
323 }
324 $this->_current_item['msgid'][] = $quoted;
325
326 $this->_context = 'MSGID_PLURAL';
327 return;
328 }
329 elseif (!strncmp('msgid', $line, 5)) {
330 // Starting a new message.
331
332 if (($this->_context == 'MSGSTR') || ($this->_context == 'MSGSTR_ARR')) {
333 // We are currently in string context, save current item.
334 $this->setItemFromArray($this->_current_item);
335
336 // Start a new context for the msgid.
337 $this->_current_item = [];
338 }
339 elseif ($this->_context == 'MSGID') {
340 // We are currently already in the context, meaning we passed an id with no data.
341 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: "msgid" is unexpected on line %line.', $log_vars);
342 return FALSE;
343 }
344
345 // Remove 'msgid' and trim away whitespace.
346 $line = trim(substr($line, 5));
347
348 // Only the message id string is left, parse it.
349 $quoted = $this->parseQuoted($line);
350 if ($quoted === FALSE) {
351 // The message id must be wrapped in quotes.
352 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: invalid format for "msgid" on line %line.', $log_vars, $log_vars);
353 return FALSE;
354 }
355
356 $this->_current_item['msgid'] = $quoted;
357 $this->_context = 'MSGID';
358 return;
359 }
360 elseif (!strncmp('msgctxt', $line, 7)) {
361 // Starting a new context.
362
363 if (($this->_context == 'MSGSTR') || ($this->_context == 'MSGSTR_ARR')) {
364 // We are currently in string context, save current item.
365 $this->setItemFromArray($this->_current_item);
366 $this->_current_item = [];
367 }
368 elseif (!empty($this->_current_item['msgctxt'])) {
369 // A context cannot apply to another context.
370 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: "msgctxt" is unexpected on line %line.', $log_vars);
371 return FALSE;
372 }
373
374 // Remove 'msgctxt' and trim away whitespaces.
375 $line = trim(substr($line, 7));
376
377 // Only the msgctxt string is left, parse it.
378 $quoted = $this->parseQuoted($line);
379 if ($quoted === FALSE) {
380 // The context string must be quoted.
381 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: invalid format for "msgctxt" on line %line.', $log_vars);
382 return FALSE;
383 }
384
385 $this->_current_item['msgctxt'] = $quoted;
386
387 $this->_context = 'MSGCTXT';
388 return;
389 }
390 elseif (!strncmp('msgstr[', $line, 7)) {
391 // A message string for a specific plurality.
392
393 if (($this->_context != 'MSGID') &&
394 ($this->_context != 'MSGCTXT') &&
395 ($this->_context != 'MSGID_PLURAL') &&
396 ($this->_context != 'MSGSTR_ARR')) {
397 // Plural message strings must come after msgid, msgxtxt,
398 // msgid_plural, or other msgstr[] entries.
399 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: "msgstr[]" is unexpected on line %line.', $log_vars);
400 return FALSE;
401 }
402
403 // Ensure the plurality is terminated.
404 if (strpos($line, ']') === FALSE) {
405 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: invalid format for "msgstr[]" on line %line.', $log_vars);
406 return FALSE;
407 }
408
409 // Extract the plurality.
410 $frombracket = strstr($line, '[');
411 $this->_current_plural_index = substr($frombracket, 1, strpos($frombracket, ']') - 1);
412
413 // Skip to the next whitespace and trim away any further whitespace,
414 // bringing $line to the message text only.
415 $line = trim(strstr($line, " "));
416
417 $quoted = $this->parseQuoted($line);
418 if ($quoted === FALSE) {
419 // The string must be quoted.
420 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: invalid format for "msgstr[]" on line %line.', $log_vars);
421 return FALSE;
422 }
423 if (!isset($this->_current_item['msgstr']) || !is_array($this->_current_item['msgstr'])) {
424 $this->_current_item['msgstr'] = [];
425 }
426
427 $this->_current_item['msgstr'][$this->_current_plural_index] = $quoted;
428
429 $this->_context = 'MSGSTR_ARR';
430 return;
431 }
432 elseif (!strncmp("msgstr", $line, 6)) {
433 // A string pair for an msgid (with optional context).
434
435 if (($this->_context != 'MSGID') && ($this->_context != 'MSGCTXT')) {
436 // Strings are only valid within an id or context scope.
437 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: "msgstr" is unexpected on line %line.', $log_vars);
438 return FALSE;
439 }
440
441 // Remove 'msgstr' and trim away away whitespaces.
442 $line = trim(substr($line, 6));
443
444 // Only the msgstr string is left, parse it.
445 $quoted = $this->parseQuoted($line);
446 if ($quoted === FALSE) {
447 // The string must be quoted.
448 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: invalid format for "msgstr" on line %line.', $log_vars);
449 return FALSE;
450 }
451
452 $this->_current_item['msgstr'] = $quoted;
453
454 $this->_context = 'MSGSTR';
455 return;
456 }
457 elseif ($line != '') {
458 // Anything that is not a token may be a continuation of a previous token.
459
460 $quoted = $this->parseQuoted($line);
461 if ($quoted === FALSE) {
462 // This string must be quoted.
463 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: string continuation expected on line %line.', $log_vars);
464 return FALSE;
465 }
466
467 // Append the string to the current item.
468 if (($this->_context == 'MSGID') || ($this->_context == 'MSGID_PLURAL')) {
469 if (is_array($this->_current_item['msgid'])) {
470 // Add string to last array element for plural sources.
471 $last_index = count($this->_current_item['msgid']) - 1;
472 $this->_current_item['msgid'][$last_index] .= $quoted;
473 }
474 else {
475 // Singular source, just append the string.
476 $this->_current_item['msgid'] .= $quoted;
477 }
478 }
479 elseif ($this->_context == 'MSGCTXT') {
480 // Multiline context name.
481 $this->_current_item['msgctxt'] .= $quoted;
482 }
483 elseif ($this->_context == 'MSGSTR') {
484 // Multiline translation string.
485 $this->_current_item['msgstr'] .= $quoted;
486 }
487 elseif ($this->_context == 'MSGSTR_ARR') {
488 // Multiline plural translation string.
489 $this->_current_item['msgstr'][$this->_current_plural_index] .= $quoted;
490 }
491 else {
492 // No valid context to append to.
493 $this->_errors[] = SafeMarkup::format('The translation stream %uri contains an error: unexpected string on line %line.', $log_vars);
494 return FALSE;
495 }
496 return;
497 }
498 }
499
500 // Empty line read or EOF of PO stream, close out the last entry.
501 if (($this->_context == 'MSGSTR') || ($this->_context == 'MSGSTR_ARR')) {
502 $this->setItemFromArray($this->_current_item);
503 $this->_current_item = [];
504 }
505 elseif ($this->_context != 'COMMENT') {
506 $this->_errors[] = SafeMarkup::format('The translation stream %uri ended unexpectedly at line %line.', $log_vars);
507 return FALSE;
508 }
509 }
510
511 /**
512 * Store the parsed values as a PoItem object.
513 */
514 public function setItemFromArray($value) {
515 $plural = FALSE;
516
517 $comments = '';
518 if (isset($value['#'])) {
519 $comments = $this->shortenComments($value['#']);
520 }
521
522 if (is_array($value['msgstr'])) {
523 // Sort plural variants by their form index.
524 ksort($value['msgstr']);
525 $plural = TRUE;
526 }
527
528 $item = new PoItem();
529 $item->setContext(isset($value['msgctxt']) ? $value['msgctxt'] : '');
530 $item->setSource($value['msgid']);
531 $item->setTranslation($value['msgstr']);
532 $item->setPlural($plural);
533 $item->setComment($comments);
534 $item->setLangcode($this->_langcode);
535
536 $this->_last_item = $item;
537
538 $this->_context = 'COMMENT';
539 }
540
541 /**
542 * Parses a string in quotes.
543 *
544 * @param $string
545 * A string specified with enclosing quotes.
546 *
547 * @return
548 * The string parsed from inside the quotes.
549 */
550 public function parseQuoted($string) {
551 if (substr($string, 0, 1) != substr($string, -1, 1)) {
552 // Start and end quotes must be the same.
553 return FALSE;
554 }
555 $quote = substr($string, 0, 1);
556 $string = substr($string, 1, -1);
557 if ($quote == '"') {
558 // Double quotes: strip slashes.
559 return stripcslashes($string);
560 }
561 elseif ($quote == "'") {
562 // Simple quote: return as-is.
563 return $string;
564 }
565 else {
566 // Unrecognized quote.
567 return FALSE;
568 }
569 }
570
571 /**
572 * Generates a short, one-string version of the passed comment array.
573 *
574 * @param $comment
575 * An array of strings containing a comment.
576 *
577 * @return
578 * Short one-string version of the comment.
579 */
580 private function shortenComments($comment) {
581 $comm = '';
582 while (count($comment)) {
583 $test = $comm . substr(array_shift($comment), 1) . ', ';
584 if (strlen($comm) < 130) {
585 $comm = $test;
586 }
587 else {
588 break;
589 }
590 }
591 return trim(substr($comm, 0, -2));
592 }
593
594 }