comparison vendor/zendframework/zend-feed/src/Reader/Reader.php @ 0:4c8ae668cc8c

Initial import (non-working)
author Chris Cannam
date Wed, 29 Nov 2017 16:09:58 +0000
parents
children 7a779792577d
comparison
equal deleted inserted replaced
-1:000000000000 0:4c8ae668cc8c
1 <?php
2 /**
3 * Zend Framework (http://framework.zend.com/)
4 *
5 * @link http://github.com/zendframework/zf2 for the canonical source repository
6 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
7 * @license http://framework.zend.com/license/new-bsd New BSD License
8 */
9
10 namespace Zend\Feed\Reader;
11
12 use DOMDocument;
13 use DOMXPath;
14 use Zend\Cache\Storage\StorageInterface as CacheStorage;
15 use Zend\Http as ZendHttp;
16 use Zend\Stdlib\ErrorHandler;
17 use Zend\Feed\Reader\Exception\InvalidHttpClientException;
18
19 /**
20 */
21 class Reader implements ReaderImportInterface
22 {
23 /**
24 * Namespace constants
25 */
26 const NAMESPACE_ATOM_03 = 'http://purl.org/atom/ns#';
27 const NAMESPACE_ATOM_10 = 'http://www.w3.org/2005/Atom';
28 const NAMESPACE_RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
29 const NAMESPACE_RSS_090 = 'http://my.netscape.com/rdf/simple/0.9/';
30 const NAMESPACE_RSS_10 = 'http://purl.org/rss/1.0/';
31
32 /**
33 * Feed type constants
34 */
35 const TYPE_ANY = 'any';
36 const TYPE_ATOM_03 = 'atom-03';
37 const TYPE_ATOM_10 = 'atom-10';
38 const TYPE_ATOM_10_ENTRY = 'atom-10-entry';
39 const TYPE_ATOM_ANY = 'atom';
40 const TYPE_RSS_090 = 'rss-090';
41 const TYPE_RSS_091 = 'rss-091';
42 const TYPE_RSS_091_NETSCAPE = 'rss-091n';
43 const TYPE_RSS_091_USERLAND = 'rss-091u';
44 const TYPE_RSS_092 = 'rss-092';
45 const TYPE_RSS_093 = 'rss-093';
46 const TYPE_RSS_094 = 'rss-094';
47 const TYPE_RSS_10 = 'rss-10';
48 const TYPE_RSS_20 = 'rss-20';
49 const TYPE_RSS_ANY = 'rss';
50
51 /**
52 * Cache instance
53 *
54 * @var CacheStorage
55 */
56 protected static $cache = null;
57
58 /**
59 * HTTP client object to use for retrieving feeds
60 *
61 * @var Http\ClientInterface
62 */
63 protected static $httpClient = null;
64
65 /**
66 * Override HTTP PUT and DELETE request methods?
67 *
68 * @var bool
69 */
70 protected static $httpMethodOverride = false;
71
72 protected static $httpConditionalGet = false;
73
74 protected static $extensionManager = null;
75
76 protected static $extensions = [
77 'feed' => [
78 'DublinCore\Feed',
79 'Atom\Feed'
80 ],
81 'entry' => [
82 'Content\Entry',
83 'DublinCore\Entry',
84 'Atom\Entry'
85 ],
86 'core' => [
87 'DublinCore\Feed',
88 'Atom\Feed',
89 'Content\Entry',
90 'DublinCore\Entry',
91 'Atom\Entry'
92 ]
93 ];
94
95 /**
96 * Get the Feed cache
97 *
98 * @return CacheStorage
99 */
100 public static function getCache()
101 {
102 return static::$cache;
103 }
104
105 /**
106 * Set the feed cache
107 *
108 * @param CacheStorage $cache
109 * @return void
110 */
111 public static function setCache(CacheStorage $cache)
112 {
113 static::$cache = $cache;
114 }
115
116 /**
117 * Set the HTTP client instance
118 *
119 * Sets the HTTP client object to use for retrieving the feeds.
120 *
121 * @param ZendHttp\Client | Http\ClientInterface $httpClient
122 * @return void
123 */
124 public static function setHttpClient($httpClient)
125 {
126 if ($httpClient instanceof ZendHttp\Client) {
127 $httpClient = new Http\ZendHttpClientDecorator($httpClient);
128 }
129
130 if (! $httpClient instanceof Http\ClientInterface) {
131 throw new InvalidHttpClientException();
132 }
133 static::$httpClient = $httpClient;
134 }
135
136 /**
137 * Gets the HTTP client object. If none is set, a new ZendHttp\Client will be used.
138 *
139 * @return Http\ClientInterface
140 */
141 public static function getHttpClient()
142 {
143 if (! static::$httpClient) {
144 static::$httpClient = new Http\ZendHttpClientDecorator(new ZendHttp\Client());
145 }
146
147 return static::$httpClient;
148 }
149
150 /**
151 * Toggle using POST instead of PUT and DELETE HTTP methods
152 *
153 * Some feed implementations do not accept PUT and DELETE HTTP
154 * methods, or they can't be used because of proxies or other
155 * measures. This allows turning on using POST where PUT and
156 * DELETE would normally be used; in addition, an
157 * X-Method-Override header will be sent with a value of PUT or
158 * DELETE as appropriate.
159 *
160 * @param bool $override Whether to override PUT and DELETE.
161 * @return void
162 */
163 public static function setHttpMethodOverride($override = true)
164 {
165 static::$httpMethodOverride = $override;
166 }
167
168 /**
169 * Get the HTTP override state
170 *
171 * @return bool
172 */
173 public static function getHttpMethodOverride()
174 {
175 return static::$httpMethodOverride;
176 }
177
178 /**
179 * Set the flag indicating whether or not to use HTTP conditional GET
180 *
181 * @param bool $bool
182 * @return void
183 */
184 public static function useHttpConditionalGet($bool = true)
185 {
186 static::$httpConditionalGet = $bool;
187 }
188
189 /**
190 * Import a feed by providing a URI
191 *
192 * @param string $uri The URI to the feed
193 * @param string $etag OPTIONAL Last received ETag for this resource
194 * @param string $lastModified OPTIONAL Last-Modified value for this resource
195 * @return Feed\FeedInterface
196 * @throws Exception\RuntimeException
197 */
198 public static function import($uri, $etag = null, $lastModified = null)
199 {
200 $cache = self::getCache();
201 $client = self::getHttpClient();
202 $cacheId = 'Zend_Feed_Reader_' . md5($uri);
203
204 if (static::$httpConditionalGet && $cache) {
205 $headers = [];
206 $data = $cache->getItem($cacheId);
207 if ($data && $client instanceof Http\HeaderAwareClientInterface) {
208 // Only check for ETag and last modified values in the cache
209 // if we have a client capable of emitting headers in the first place.
210 if ($etag === null) {
211 $etag = $cache->getItem($cacheId . '_etag');
212 }
213 if ($lastModified === null) {
214 $lastModified = $cache->getItem($cacheId . '_lastmodified');
215 }
216 if ($etag) {
217 $headers['If-None-Match'] = [$etag];
218 }
219 if ($lastModified) {
220 $headers['If-Modified-Since'] = [$lastModified];
221 }
222 }
223 $response = $client->get($uri, $headers);
224 if ($response->getStatusCode() !== 200 && $response->getStatusCode() !== 304) {
225 throw new Exception\RuntimeException('Feed failed to load, got response code ' . $response->getStatusCode());
226 }
227 if ($response->getStatusCode() == 304) {
228 $responseXml = $data;
229 } else {
230 $responseXml = $response->getBody();
231 $cache->setItem($cacheId, $responseXml);
232
233 if ($response instanceof Http\HeaderAwareResponseInterface) {
234 if ($response->getHeaderLine('ETag', false)) {
235 $cache->setItem($cacheId . '_etag', $response->getHeaderLine('ETag'));
236 }
237 if ($response->getHeaderLine('Last-Modified', false)) {
238 $cache->setItem($cacheId . '_lastmodified', $response->getHeaderLine('Last-Modified'));
239 }
240 }
241 }
242 return static::importString($responseXml);
243 } elseif ($cache) {
244 $data = $cache->getItem($cacheId);
245 if ($data) {
246 return static::importString($data);
247 }
248 $response = $client->get($uri);
249 if ((int) $response->getStatusCode() !== 200) {
250 throw new Exception\RuntimeException('Feed failed to load, got response code ' . $response->getStatusCode());
251 }
252 $responseXml = $response->getBody();
253 $cache->setItem($cacheId, $responseXml);
254 return static::importString($responseXml);
255 } else {
256 $response = $client->get($uri);
257 if ((int) $response->getStatusCode() !== 200) {
258 throw new Exception\RuntimeException('Feed failed to load, got response code ' . $response->getStatusCode());
259 }
260 $reader = static::importString($response->getBody());
261 $reader->setOriginalSourceUri($uri);
262 return $reader;
263 }
264 }
265
266 /**
267 * Import a feed from a remote URI
268 *
269 * Performs similarly to import(), except it uses the HTTP client passed to
270 * the method, and does not take into account cached data.
271 *
272 * Primary purpose is to make it possible to use the Reader with alternate
273 * HTTP client implementations.
274 *
275 * @param string $uri
276 * @param Http\ClientInterface $client
277 * @return self
278 * @throws Exception\RuntimeException if response is not an Http\ResponseInterface
279 */
280 public static function importRemoteFeed($uri, Http\ClientInterface $client)
281 {
282 $response = $client->get($uri);
283 if (! $response instanceof Http\ResponseInterface) {
284 throw new Exception\RuntimeException(sprintf(
285 'Did not receive a %s\Http\ResponseInterface from the provided HTTP client; received "%s"',
286 __NAMESPACE__,
287 (is_object($response) ? get_class($response) : gettype($response))
288 ));
289 }
290
291 if ((int) $response->getStatusCode() !== 200) {
292 throw new Exception\RuntimeException('Feed failed to load, got response code ' . $response->getStatusCode());
293 }
294 $reader = static::importString($response->getBody());
295 $reader->setOriginalSourceUri($uri);
296 return $reader;
297 }
298
299 /**
300 * Import a feed from a string
301 *
302 * @param string $string
303 * @return Feed\FeedInterface
304 * @throws Exception\InvalidArgumentException
305 * @throws Exception\RuntimeException
306 */
307 public static function importString($string)
308 {
309 $trimmed = trim($string);
310 if (!is_string($string) || empty($trimmed)) {
311 throw new Exception\InvalidArgumentException('Only non empty strings are allowed as input');
312 }
313
314 $libxmlErrflag = libxml_use_internal_errors(true);
315 $oldValue = libxml_disable_entity_loader(true);
316 $dom = new DOMDocument;
317 $status = $dom->loadXML(trim($string));
318 foreach ($dom->childNodes as $child) {
319 if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) {
320 throw new Exception\InvalidArgumentException(
321 'Invalid XML: Detected use of illegal DOCTYPE'
322 );
323 }
324 }
325 libxml_disable_entity_loader($oldValue);
326 libxml_use_internal_errors($libxmlErrflag);
327
328 if (!$status) {
329 // Build error message
330 $error = libxml_get_last_error();
331 if ($error && $error->message) {
332 $error->message = trim($error->message);
333 $errormsg = "DOMDocument cannot parse XML: {$error->message}";
334 } else {
335 $errormsg = "DOMDocument cannot parse XML: Please check the XML document's validity";
336 }
337 throw new Exception\RuntimeException($errormsg);
338 }
339
340 $type = static::detectType($dom);
341
342 static::registerCoreExtensions();
343
344 if (substr($type, 0, 3) == 'rss') {
345 $reader = new Feed\Rss($dom, $type);
346 } elseif (substr($type, 8, 5) == 'entry') {
347 $reader = new Entry\Atom($dom->documentElement, 0, self::TYPE_ATOM_10);
348 } elseif (substr($type, 0, 4) == 'atom') {
349 $reader = new Feed\Atom($dom, $type);
350 } else {
351 throw new Exception\RuntimeException('The URI used does not point to a '
352 . 'valid Atom, RSS or RDF feed that Zend\Feed\Reader can parse.');
353 }
354 return $reader;
355 }
356
357 /**
358 * Imports a feed from a file located at $filename.
359 *
360 * @param string $filename
361 * @throws Exception\RuntimeException
362 * @return Feed\FeedInterface
363 */
364 public static function importFile($filename)
365 {
366 ErrorHandler::start();
367 $feed = file_get_contents($filename);
368 $err = ErrorHandler::stop();
369 if ($feed === false) {
370 throw new Exception\RuntimeException("File '{$filename}' could not be loaded", 0, $err);
371 }
372 return static::importString($feed);
373 }
374
375 /**
376 * Find feed links
377 *
378 * @param $uri
379 * @return FeedSet
380 * @throws Exception\RuntimeException
381 */
382 public static function findFeedLinks($uri)
383 {
384 $client = static::getHttpClient();
385 $response = $client->get($uri);
386 if ($response->getStatusCode() !== 200) {
387 throw new Exception\RuntimeException("Failed to access $uri, got response code " . $response->getStatusCode());
388 }
389 $responseHtml = $response->getBody();
390 $libxmlErrflag = libxml_use_internal_errors(true);
391 $oldValue = libxml_disable_entity_loader(true);
392 $dom = new DOMDocument;
393 $status = $dom->loadHTML(trim($responseHtml));
394 libxml_disable_entity_loader($oldValue);
395 libxml_use_internal_errors($libxmlErrflag);
396 if (!$status) {
397 // Build error message
398 $error = libxml_get_last_error();
399 if ($error && $error->message) {
400 $error->message = trim($error->message);
401 $errormsg = "DOMDocument cannot parse HTML: {$error->message}";
402 } else {
403 $errormsg = "DOMDocument cannot parse HTML: Please check the XML document's validity";
404 }
405 throw new Exception\RuntimeException($errormsg);
406 }
407 $feedSet = new FeedSet;
408 $links = $dom->getElementsByTagName('link');
409 $feedSet->addLinks($links, $uri);
410 return $feedSet;
411 }
412
413 /**
414 * Detect the feed type of the provided feed
415 *
416 * @param Feed\AbstractFeed|DOMDocument|string $feed
417 * @param bool $specOnly
418 * @return string
419 * @throws Exception\InvalidArgumentException
420 * @throws Exception\RuntimeException
421 */
422 public static function detectType($feed, $specOnly = false)
423 {
424 if ($feed instanceof Feed\AbstractFeed) {
425 $dom = $feed->getDomDocument();
426 } elseif ($feed instanceof DOMDocument) {
427 $dom = $feed;
428 } elseif (is_string($feed) && !empty($feed)) {
429 ErrorHandler::start(E_NOTICE|E_WARNING);
430 ini_set('track_errors', 1);
431 $oldValue = libxml_disable_entity_loader(true);
432 $dom = new DOMDocument;
433 $status = $dom->loadXML($feed);
434 foreach ($dom->childNodes as $child) {
435 if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) {
436 throw new Exception\InvalidArgumentException(
437 'Invalid XML: Detected use of illegal DOCTYPE'
438 );
439 }
440 }
441 libxml_disable_entity_loader($oldValue);
442 ini_restore('track_errors');
443 ErrorHandler::stop();
444 if (!$status) {
445 if (!isset($phpErrormsg)) {
446 if (function_exists('xdebug_is_enabled')) {
447 $phpErrormsg = '(error message not available, when XDebug is running)';
448 } else {
449 $phpErrormsg = '(error message not available)';
450 }
451 }
452 throw new Exception\RuntimeException("DOMDocument cannot parse XML: $phpErrormsg");
453 }
454 } else {
455 throw new Exception\InvalidArgumentException('Invalid object/scalar provided: must'
456 . ' be of type Zend\Feed\Reader\Feed, DomDocument or string');
457 }
458 $xpath = new DOMXPath($dom);
459
460 if ($xpath->query('/rss')->length) {
461 $type = self::TYPE_RSS_ANY;
462 $version = $xpath->evaluate('string(/rss/@version)');
463
464 if (strlen($version) > 0) {
465 switch ($version) {
466 case '2.0':
467 $type = self::TYPE_RSS_20;
468 break;
469
470 case '0.94':
471 $type = self::TYPE_RSS_094;
472 break;
473
474 case '0.93':
475 $type = self::TYPE_RSS_093;
476 break;
477
478 case '0.92':
479 $type = self::TYPE_RSS_092;
480 break;
481
482 case '0.91':
483 $type = self::TYPE_RSS_091;
484 break;
485 }
486 }
487
488 return $type;
489 }
490
491 $xpath->registerNamespace('rdf', self::NAMESPACE_RDF);
492
493 if ($xpath->query('/rdf:RDF')->length) {
494 $xpath->registerNamespace('rss', self::NAMESPACE_RSS_10);
495
496 if ($xpath->query('/rdf:RDF/rss:channel')->length
497 || $xpath->query('/rdf:RDF/rss:image')->length
498 || $xpath->query('/rdf:RDF/rss:item')->length
499 || $xpath->query('/rdf:RDF/rss:textinput')->length
500 ) {
501 return self::TYPE_RSS_10;
502 }
503
504 $xpath->registerNamespace('rss', self::NAMESPACE_RSS_090);
505
506 if ($xpath->query('/rdf:RDF/rss:channel')->length
507 || $xpath->query('/rdf:RDF/rss:image')->length
508 || $xpath->query('/rdf:RDF/rss:item')->length
509 || $xpath->query('/rdf:RDF/rss:textinput')->length
510 ) {
511 return self::TYPE_RSS_090;
512 }
513 }
514
515 $xpath->registerNamespace('atom', self::NAMESPACE_ATOM_10);
516
517 if ($xpath->query('//atom:feed')->length) {
518 return self::TYPE_ATOM_10;
519 }
520
521 if ($xpath->query('//atom:entry')->length) {
522 if ($specOnly == true) {
523 return self::TYPE_ATOM_10;
524 } else {
525 return self::TYPE_ATOM_10_ENTRY;
526 }
527 }
528
529 $xpath->registerNamespace('atom', self::NAMESPACE_ATOM_03);
530
531 if ($xpath->query('//atom:feed')->length) {
532 return self::TYPE_ATOM_03;
533 }
534
535 return self::TYPE_ANY;
536 }
537
538 /**
539 * Set plugin manager for use with Extensions
540 *
541 * @param ExtensionManagerInterface $extensionManager
542 */
543 public static function setExtensionManager(ExtensionManagerInterface $extensionManager)
544 {
545 static::$extensionManager = $extensionManager;
546 }
547
548 /**
549 * Get plugin manager for use with Extensions
550 *
551 * @return ExtensionManagerInterface
552 */
553 public static function getExtensionManager()
554 {
555 if (!isset(static::$extensionManager)) {
556 static::setExtensionManager(new StandaloneExtensionManager());
557 }
558 return static::$extensionManager;
559 }
560
561 /**
562 * Register an Extension by name
563 *
564 * @param string $name
565 * @return void
566 * @throws Exception\RuntimeException if unable to resolve Extension class
567 */
568 public static function registerExtension($name)
569 {
570 $feedName = $name . '\Feed';
571 $entryName = $name . '\Entry';
572 $manager = static::getExtensionManager();
573 if (static::isRegistered($name)) {
574 if ($manager->has($feedName) || $manager->has($entryName)) {
575 return;
576 }
577 }
578
579 if (!$manager->has($feedName) && !$manager->has($entryName)) {
580 throw new Exception\RuntimeException('Could not load extension: ' . $name
581 . ' using Plugin Loader. Check prefix paths are configured and extension exists.');
582 }
583 if ($manager->has($feedName)) {
584 static::$extensions['feed'][] = $feedName;
585 }
586 if ($manager->has($entryName)) {
587 static::$extensions['entry'][] = $entryName;
588 }
589 }
590
591 /**
592 * Is a given named Extension registered?
593 *
594 * @param string $extensionName
595 * @return bool
596 */
597 public static function isRegistered($extensionName)
598 {
599 $feedName = $extensionName . '\Feed';
600 $entryName = $extensionName . '\Entry';
601 if (in_array($feedName, static::$extensions['feed'])
602 || in_array($entryName, static::$extensions['entry'])
603 ) {
604 return true;
605 }
606 return false;
607 }
608
609 /**
610 * Get a list of extensions
611 *
612 * @return array
613 */
614 public static function getExtensions()
615 {
616 return static::$extensions;
617 }
618
619 /**
620 * Reset class state to defaults
621 *
622 * @return void
623 */
624 public static function reset()
625 {
626 static::$cache = null;
627 static::$httpClient = null;
628 static::$httpMethodOverride = false;
629 static::$httpConditionalGet = false;
630 static::$extensionManager = null;
631 static::$extensions = [
632 'feed' => [
633 'DublinCore\Feed',
634 'Atom\Feed'
635 ],
636 'entry' => [
637 'Content\Entry',
638 'DublinCore\Entry',
639 'Atom\Entry'
640 ],
641 'core' => [
642 'DublinCore\Feed',
643 'Atom\Feed',
644 'Content\Entry',
645 'DublinCore\Entry',
646 'Atom\Entry'
647 ]
648 ];
649 }
650
651 /**
652 * Register core (default) extensions
653 *
654 * @return void
655 */
656 protected static function registerCoreExtensions()
657 {
658 static::registerExtension('DublinCore');
659 static::registerExtension('Content');
660 static::registerExtension('Atom');
661 static::registerExtension('Slash');
662 static::registerExtension('WellFormedWeb');
663 static::registerExtension('Thread');
664 static::registerExtension('Podcast');
665 }
666
667 /**
668 * Utility method to apply array_unique operation to a multidimensional
669 * array.
670 *
671 * @param array
672 * @return array
673 */
674 public static function arrayUnique(array $array)
675 {
676 foreach ($array as &$value) {
677 $value = serialize($value);
678 }
679 $array = array_unique($array);
680 foreach ($array as &$value) {
681 $value = unserialize($value);
682 }
683 return $array;
684 }
685 }