comparison vendor/zendframework/zend-feed/src/Reader/Feed/Rss.php @ 0:c75dbcec494b

Initial commit from drush-created site
author Chris Cannam
date Thu, 05 Jul 2018 14:24:15 +0000
parents
children 5311817fb629
comparison
equal deleted inserted replaced
-1:000000000000 0:c75dbcec494b
1 <?php
2 /**
3 * Zend Framework (http://framework.zend.com/)
4 *
5 * @link http://github.com/zendframework/zf2 for the canonical source repository
6 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
7 * @license http://framework.zend.com/license/new-bsd New BSD License
8 */
9
10 namespace Zend\Feed\Reader\Feed;
11
12 use DateTime;
13 use DOMDocument;
14 use Zend\Feed\Reader;
15 use Zend\Feed\Reader\Collection;
16 use Zend\Feed\Reader\Exception;
17
18 /**
19 */
20 class Rss extends AbstractFeed
21 {
22 /**
23 * Constructor
24 *
25 * @param DOMDocument $dom
26 * @param string $type
27 */
28 public function __construct(DOMDocument $dom, $type = null)
29 {
30 parent::__construct($dom, $type);
31
32 $manager = Reader\Reader::getExtensionManager();
33
34 $feed = $manager->get('DublinCore\Feed');
35 $feed->setDomDocument($dom);
36 $feed->setType($this->data['type']);
37 $feed->setXpath($this->xpath);
38 $this->extensions['DublinCore\Feed'] = $feed;
39
40 $feed = $manager->get('Atom\Feed');
41 $feed->setDomDocument($dom);
42 $feed->setType($this->data['type']);
43 $feed->setXpath($this->xpath);
44 $this->extensions['Atom\Feed'] = $feed;
45
46 if ($this->getType() !== Reader\Reader::TYPE_RSS_10
47 && $this->getType() !== Reader\Reader::TYPE_RSS_090
48 ) {
49 $xpathPrefix = '/rss/channel';
50 } else {
51 $xpathPrefix = '/rdf:RDF/rss:channel';
52 }
53 foreach ($this->extensions as $extension) {
54 $extension->setXpathPrefix($xpathPrefix);
55 }
56 }
57
58 /**
59 * Get a single author
60 *
61 * @param int $index
62 * @return string|null
63 */
64 public function getAuthor($index = 0)
65 {
66 $authors = $this->getAuthors();
67
68 if (isset($authors[$index])) {
69 return $authors[$index];
70 }
71
72 return;
73 }
74
75 /**
76 * Get an array with feed authors
77 *
78 * @return array
79 */
80 public function getAuthors()
81 {
82 if (array_key_exists('authors', $this->data)) {
83 return $this->data['authors'];
84 }
85
86 $authors = [];
87 $authorsDc = $this->getExtension('DublinCore')->getAuthors();
88 if (!empty($authorsDc)) {
89 foreach ($authorsDc as $author) {
90 $authors[] = [
91 'name' => $author['name']
92 ];
93 }
94 }
95
96 /**
97 * Technically RSS doesn't specific author element use at the feed level
98 * but it's supported on a "just in case" basis.
99 */
100 if ($this->getType() !== Reader\Reader::TYPE_RSS_10
101 && $this->getType() !== Reader\Reader::TYPE_RSS_090) {
102 $list = $this->xpath->query('//author');
103 } else {
104 $list = $this->xpath->query('//rss:author');
105 }
106 if ($list->length) {
107 foreach ($list as $author) {
108 $string = trim($author->nodeValue);
109 $data = [];
110 // Pretty rough parsing - but it's a catchall
111 if (preg_match("/^.*@[^ ]*/", $string, $matches)) {
112 $data['email'] = trim($matches[0]);
113 if (preg_match("/\((.*)\)$/", $string, $matches)) {
114 $data['name'] = $matches[1];
115 }
116 $authors[] = $data;
117 }
118 }
119 }
120
121 if (count($authors) == 0) {
122 $authors = $this->getExtension('Atom')->getAuthors();
123 } else {
124 $authors = new Reader\Collection\Author(
125 Reader\Reader::arrayUnique($authors)
126 );
127 }
128
129 if (count($authors) == 0) {
130 $authors = null;
131 }
132
133 $this->data['authors'] = $authors;
134
135 return $this->data['authors'];
136 }
137
138 /**
139 * Get the copyright entry
140 *
141 * @return string|null
142 */
143 public function getCopyright()
144 {
145 if (array_key_exists('copyright', $this->data)) {
146 return $this->data['copyright'];
147 }
148
149 $copyright = null;
150
151 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
152 $this->getType() !== Reader\Reader::TYPE_RSS_090) {
153 $copyright = $this->xpath->evaluate('string(/rss/channel/copyright)');
154 }
155
156 if (!$copyright && $this->getExtension('DublinCore') !== null) {
157 $copyright = $this->getExtension('DublinCore')->getCopyright();
158 }
159
160 if (empty($copyright)) {
161 $copyright = $this->getExtension('Atom')->getCopyright();
162 }
163
164 if (!$copyright) {
165 $copyright = null;
166 }
167
168 $this->data['copyright'] = $copyright;
169
170 return $this->data['copyright'];
171 }
172
173 /**
174 * Get the feed creation date
175 *
176 * @return string|null
177 */
178 public function getDateCreated()
179 {
180 return $this->getDateModified();
181 }
182
183 /**
184 * Get the feed modification date
185 *
186 * @return DateTime
187 * @throws Exception\RuntimeException
188 */
189 public function getDateModified()
190 {
191 if (array_key_exists('datemodified', $this->data)) {
192 return $this->data['datemodified'];
193 }
194
195 $date = null;
196
197 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
198 $this->getType() !== Reader\Reader::TYPE_RSS_090) {
199 $dateModified = $this->xpath->evaluate('string(/rss/channel/pubDate)');
200 if (!$dateModified) {
201 $dateModified = $this->xpath->evaluate('string(/rss/channel/lastBuildDate)');
202 }
203 if ($dateModified) {
204 $dateModifiedParsed = strtotime($dateModified);
205 if ($dateModifiedParsed) {
206 $date = new DateTime('@' . $dateModifiedParsed);
207 } else {
208 $dateStandards = [DateTime::RSS, DateTime::RFC822,
209 DateTime::RFC2822, null];
210 foreach ($dateStandards as $standard) {
211 try {
212 $date = DateTime::createFromFormat($standard, $dateModified);
213 break;
214 } catch (\Exception $e) {
215 if ($standard === null) {
216 throw new Exception\RuntimeException(
217 'Could not load date due to unrecognised'
218 .' format (should follow RFC 822 or 2822):'
219 . $e->getMessage(),
220 0, $e
221 );
222 }
223 }
224 }
225 }
226 }
227 }
228
229 if (!$date) {
230 $date = $this->getExtension('DublinCore')->getDate();
231 }
232
233 if (!$date) {
234 $date = $this->getExtension('Atom')->getDateModified();
235 }
236
237 if (!$date) {
238 $date = null;
239 }
240
241 $this->data['datemodified'] = $date;
242
243 return $this->data['datemodified'];
244 }
245
246 /**
247 * Get the feed lastBuild date
248 *
249 * @throws Exception\RuntimeException
250 * @return DateTime
251 */
252 public function getLastBuildDate()
253 {
254 if (array_key_exists('lastBuildDate', $this->data)) {
255 return $this->data['lastBuildDate'];
256 }
257
258 $date = null;
259
260 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
261 $this->getType() !== Reader\Reader::TYPE_RSS_090) {
262 $lastBuildDate = $this->xpath->evaluate('string(/rss/channel/lastBuildDate)');
263 if ($lastBuildDate) {
264 $lastBuildDateParsed = strtotime($lastBuildDate);
265 if ($lastBuildDateParsed) {
266 $date = new DateTime('@' . $lastBuildDateParsed);
267 } else {
268 $dateStandards = [DateTime::RSS, DateTime::RFC822,
269 DateTime::RFC2822, null];
270 foreach ($dateStandards as $standard) {
271 try {
272 $date = DateTime::createFromFormat($standard, $lastBuildDateParsed);
273 break;
274 } catch (\Exception $e) {
275 if ($standard === null) {
276 throw new Exception\RuntimeException(
277 'Could not load date due to unrecognised'
278 .' format (should follow RFC 822 or 2822):'
279 . $e->getMessage(),
280 0, $e
281 );
282 }
283 }
284 }
285 }
286 }
287 }
288
289 if (!$date) {
290 $date = null;
291 }
292
293 $this->data['lastBuildDate'] = $date;
294
295 return $this->data['lastBuildDate'];
296 }
297
298 /**
299 * Get the feed description
300 *
301 * @return string|null
302 */
303 public function getDescription()
304 {
305 if (array_key_exists('description', $this->data)) {
306 return $this->data['description'];
307 }
308
309 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
310 $this->getType() !== Reader\Reader::TYPE_RSS_090) {
311 $description = $this->xpath->evaluate('string(/rss/channel/description)');
312 } else {
313 $description = $this->xpath->evaluate('string(/rdf:RDF/rss:channel/rss:description)');
314 }
315
316 if (!$description && $this->getExtension('DublinCore') !== null) {
317 $description = $this->getExtension('DublinCore')->getDescription();
318 }
319
320 if (empty($description)) {
321 $description = $this->getExtension('Atom')->getDescription();
322 }
323
324 if (!$description) {
325 $description = null;
326 }
327
328 $this->data['description'] = $description;
329
330 return $this->data['description'];
331 }
332
333 /**
334 * Get the feed ID
335 *
336 * @return string|null
337 */
338 public function getId()
339 {
340 if (array_key_exists('id', $this->data)) {
341 return $this->data['id'];
342 }
343
344 $id = null;
345
346 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
347 $this->getType() !== Reader\Reader::TYPE_RSS_090) {
348 $id = $this->xpath->evaluate('string(/rss/channel/guid)');
349 }
350
351 if (!$id && $this->getExtension('DublinCore') !== null) {
352 $id = $this->getExtension('DublinCore')->getId();
353 }
354
355 if (empty($id)) {
356 $id = $this->getExtension('Atom')->getId();
357 }
358
359 if (!$id) {
360 if ($this->getLink()) {
361 $id = $this->getLink();
362 } elseif ($this->getTitle()) {
363 $id = $this->getTitle();
364 } else {
365 $id = null;
366 }
367 }
368
369 $this->data['id'] = $id;
370
371 return $this->data['id'];
372 }
373
374 /**
375 * Get the feed image data
376 *
377 * @return array|null
378 */
379 public function getImage()
380 {
381 if (array_key_exists('image', $this->data)) {
382 return $this->data['image'];
383 }
384
385 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
386 $this->getType() !== Reader\Reader::TYPE_RSS_090) {
387 $list = $this->xpath->query('/rss/channel/image');
388 $prefix = '/rss/channel/image[1]';
389 } else {
390 $list = $this->xpath->query('/rdf:RDF/rss:channel/rss:image');
391 $prefix = '/rdf:RDF/rss:channel/rss:image[1]';
392 }
393 if ($list->length > 0) {
394 $image = [];
395 $value = $this->xpath->evaluate('string(' . $prefix . '/url)');
396 if ($value) {
397 $image['uri'] = $value;
398 }
399 $value = $this->xpath->evaluate('string(' . $prefix . '/link)');
400 if ($value) {
401 $image['link'] = $value;
402 }
403 $value = $this->xpath->evaluate('string(' . $prefix . '/title)');
404 if ($value) {
405 $image['title'] = $value;
406 }
407 $value = $this->xpath->evaluate('string(' . $prefix . '/height)');
408 if ($value) {
409 $image['height'] = $value;
410 }
411 $value = $this->xpath->evaluate('string(' . $prefix . '/width)');
412 if ($value) {
413 $image['width'] = $value;
414 }
415 $value = $this->xpath->evaluate('string(' . $prefix . '/description)');
416 if ($value) {
417 $image['description'] = $value;
418 }
419 } else {
420 $image = null;
421 }
422
423 $this->data['image'] = $image;
424
425 return $this->data['image'];
426 }
427
428 /**
429 * Get the feed language
430 *
431 * @return string|null
432 */
433 public function getLanguage()
434 {
435 if (array_key_exists('language', $this->data)) {
436 return $this->data['language'];
437 }
438
439 $language = null;
440
441 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
442 $this->getType() !== Reader\Reader::TYPE_RSS_090) {
443 $language = $this->xpath->evaluate('string(/rss/channel/language)');
444 }
445
446 if (!$language && $this->getExtension('DublinCore') !== null) {
447 $language = $this->getExtension('DublinCore')->getLanguage();
448 }
449
450 if (empty($language)) {
451 $language = $this->getExtension('Atom')->getLanguage();
452 }
453
454 if (!$language) {
455 $language = $this->xpath->evaluate('string(//@xml:lang[1])');
456 }
457
458 if (!$language) {
459 $language = null;
460 }
461
462 $this->data['language'] = $language;
463
464 return $this->data['language'];
465 }
466
467 /**
468 * Get a link to the feed
469 *
470 * @return string|null
471 */
472 public function getLink()
473 {
474 if (array_key_exists('link', $this->data)) {
475 return $this->data['link'];
476 }
477
478 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
479 $this->getType() !== Reader\Reader::TYPE_RSS_090) {
480 $link = $this->xpath->evaluate('string(/rss/channel/link)');
481 } else {
482 $link = $this->xpath->evaluate('string(/rdf:RDF/rss:channel/rss:link)');
483 }
484
485 if (empty($link)) {
486 $link = $this->getExtension('Atom')->getLink();
487 }
488
489 if (!$link) {
490 $link = null;
491 }
492
493 $this->data['link'] = $link;
494
495 return $this->data['link'];
496 }
497
498 /**
499 * Get a link to the feed XML
500 *
501 * @return string|null
502 */
503 public function getFeedLink()
504 {
505 if (array_key_exists('feedlink', $this->data)) {
506 return $this->data['feedlink'];
507 }
508
509 $link = $this->getExtension('Atom')->getFeedLink();
510
511 if ($link === null || empty($link)) {
512 $link = $this->getOriginalSourceUri();
513 }
514
515 $this->data['feedlink'] = $link;
516
517 return $this->data['feedlink'];
518 }
519
520 /**
521 * Get the feed generator entry
522 *
523 * @return string|null
524 */
525 public function getGenerator()
526 {
527 if (array_key_exists('generator', $this->data)) {
528 return $this->data['generator'];
529 }
530
531 $generator = null;
532
533 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
534 $this->getType() !== Reader\Reader::TYPE_RSS_090) {
535 $generator = $this->xpath->evaluate('string(/rss/channel/generator)');
536 }
537
538 if (!$generator) {
539 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
540 $this->getType() !== Reader\Reader::TYPE_RSS_090) {
541 $generator = $this->xpath->evaluate('string(/rss/channel/atom:generator)');
542 } else {
543 $generator = $this->xpath->evaluate('string(/rdf:RDF/rss:channel/atom:generator)');
544 }
545 }
546
547 if (empty($generator)) {
548 $generator = $this->getExtension('Atom')->getGenerator();
549 }
550
551 if (!$generator) {
552 $generator = null;
553 }
554
555 $this->data['generator'] = $generator;
556
557 return $this->data['generator'];
558 }
559
560 /**
561 * Get the feed title
562 *
563 * @return string|null
564 */
565 public function getTitle()
566 {
567 if (array_key_exists('title', $this->data)) {
568 return $this->data['title'];
569 }
570
571 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
572 $this->getType() !== Reader\Reader::TYPE_RSS_090) {
573 $title = $this->xpath->evaluate('string(/rss/channel/title)');
574 } else {
575 $title = $this->xpath->evaluate('string(/rdf:RDF/rss:channel/rss:title)');
576 }
577
578 if (!$title && $this->getExtension('DublinCore') !== null) {
579 $title = $this->getExtension('DublinCore')->getTitle();
580 }
581
582 if (!$title) {
583 $title = $this->getExtension('Atom')->getTitle();
584 }
585
586 if (!$title) {
587 $title = null;
588 }
589
590 $this->data['title'] = $title;
591
592 return $this->data['title'];
593 }
594
595 /**
596 * Get an array of any supported Pusubhubbub endpoints
597 *
598 * @return array|null
599 */
600 public function getHubs()
601 {
602 if (array_key_exists('hubs', $this->data)) {
603 return $this->data['hubs'];
604 }
605
606 $hubs = $this->getExtension('Atom')->getHubs();
607
608 if (empty($hubs)) {
609 $hubs = null;
610 } else {
611 $hubs = array_unique($hubs);
612 }
613
614 $this->data['hubs'] = $hubs;
615
616 return $this->data['hubs'];
617 }
618
619 /**
620 * Get all categories
621 *
622 * @return Reader\Collection\Category
623 */
624 public function getCategories()
625 {
626 if (array_key_exists('categories', $this->data)) {
627 return $this->data['categories'];
628 }
629
630 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 &&
631 $this->getType() !== Reader\Reader::TYPE_RSS_090) {
632 $list = $this->xpath->query('/rss/channel//category');
633 } else {
634 $list = $this->xpath->query('/rdf:RDF/rss:channel//rss:category');
635 }
636
637 if ($list->length) {
638 $categoryCollection = new Collection\Category;
639 foreach ($list as $category) {
640 $categoryCollection[] = [
641 'term' => $category->nodeValue,
642 'scheme' => $category->getAttribute('domain'),
643 'label' => $category->nodeValue,
644 ];
645 }
646 } else {
647 $categoryCollection = $this->getExtension('DublinCore')->getCategories();
648 }
649
650 if (count($categoryCollection) == 0) {
651 $categoryCollection = $this->getExtension('Atom')->getCategories();
652 }
653
654 $this->data['categories'] = $categoryCollection;
655
656 return $this->data['categories'];
657 }
658
659 /**
660 * Read all entries to the internal entries array
661 *
662 */
663 protected function indexEntries()
664 {
665 if ($this->getType() !== Reader\Reader::TYPE_RSS_10 && $this->getType() !== Reader\Reader::TYPE_RSS_090) {
666 $entries = $this->xpath->evaluate('//item');
667 } else {
668 $entries = $this->xpath->evaluate('//rss:item');
669 }
670
671 foreach ($entries as $index => $entry) {
672 $this->entries[$index] = $entry;
673 }
674 }
675
676 /**
677 * Register the default namespaces for the current feed format
678 *
679 */
680 protected function registerNamespaces()
681 {
682 switch ($this->data['type']) {
683 case Reader\Reader::TYPE_RSS_10:
684 $this->xpath->registerNamespace('rdf', Reader\Reader::NAMESPACE_RDF);
685 $this->xpath->registerNamespace('rss', Reader\Reader::NAMESPACE_RSS_10);
686 break;
687
688 case Reader\Reader::TYPE_RSS_090:
689 $this->xpath->registerNamespace('rdf', Reader\Reader::NAMESPACE_RDF);
690 $this->xpath->registerNamespace('rss', Reader\Reader::NAMESPACE_RSS_090);
691 break;
692 }
693 }
694 }