Chris@0
|
1 <?php
|
Chris@0
|
2 /**
|
Chris@0
|
3 * Zend Framework (http://framework.zend.com/)
|
Chris@0
|
4 *
|
Chris@0
|
5 * @link http://github.com/zendframework/zf2 for the canonical source repository
|
Chris@0
|
6 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
|
Chris@0
|
7 * @license http://framework.zend.com/license/new-bsd New BSD License
|
Chris@0
|
8 */
|
Chris@0
|
9
|
Chris@0
|
10 namespace Zend\Feed\Reader;
|
Chris@0
|
11
|
Chris@0
|
12 use DOMDocument;
|
Chris@0
|
13 use DOMXPath;
|
Chris@0
|
14 use Zend\Cache\Storage\StorageInterface as CacheStorage;
|
Chris@12
|
15 use Zend\Feed\Reader\Exception\InvalidHttpClientException;
|
Chris@0
|
16 use Zend\Http as ZendHttp;
|
Chris@0
|
17 use Zend\Stdlib\ErrorHandler;
|
Chris@0
|
18
|
Chris@0
|
19 /**
|
Chris@0
|
20 */
|
Chris@0
|
21 class Reader implements ReaderImportInterface
|
Chris@0
|
22 {
|
Chris@0
|
23 /**
|
Chris@0
|
24 * Namespace constants
|
Chris@0
|
25 */
|
Chris@0
|
26 const NAMESPACE_ATOM_03 = 'http://purl.org/atom/ns#';
|
Chris@0
|
27 const NAMESPACE_ATOM_10 = 'http://www.w3.org/2005/Atom';
|
Chris@0
|
28 const NAMESPACE_RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
|
Chris@0
|
29 const NAMESPACE_RSS_090 = 'http://my.netscape.com/rdf/simple/0.9/';
|
Chris@0
|
30 const NAMESPACE_RSS_10 = 'http://purl.org/rss/1.0/';
|
Chris@0
|
31
|
Chris@0
|
32 /**
|
Chris@0
|
33 * Feed type constants
|
Chris@0
|
34 */
|
Chris@0
|
35 const TYPE_ANY = 'any';
|
Chris@0
|
36 const TYPE_ATOM_03 = 'atom-03';
|
Chris@0
|
37 const TYPE_ATOM_10 = 'atom-10';
|
Chris@0
|
38 const TYPE_ATOM_10_ENTRY = 'atom-10-entry';
|
Chris@0
|
39 const TYPE_ATOM_ANY = 'atom';
|
Chris@0
|
40 const TYPE_RSS_090 = 'rss-090';
|
Chris@0
|
41 const TYPE_RSS_091 = 'rss-091';
|
Chris@0
|
42 const TYPE_RSS_091_NETSCAPE = 'rss-091n';
|
Chris@0
|
43 const TYPE_RSS_091_USERLAND = 'rss-091u';
|
Chris@0
|
44 const TYPE_RSS_092 = 'rss-092';
|
Chris@0
|
45 const TYPE_RSS_093 = 'rss-093';
|
Chris@0
|
46 const TYPE_RSS_094 = 'rss-094';
|
Chris@0
|
47 const TYPE_RSS_10 = 'rss-10';
|
Chris@0
|
48 const TYPE_RSS_20 = 'rss-20';
|
Chris@0
|
49 const TYPE_RSS_ANY = 'rss';
|
Chris@0
|
50
|
Chris@0
|
51 /**
|
Chris@0
|
52 * Cache instance
|
Chris@0
|
53 *
|
Chris@0
|
54 * @var CacheStorage
|
Chris@0
|
55 */
|
Chris@0
|
56 protected static $cache = null;
|
Chris@0
|
57
|
Chris@0
|
58 /**
|
Chris@0
|
59 * HTTP client object to use for retrieving feeds
|
Chris@0
|
60 *
|
Chris@0
|
61 * @var Http\ClientInterface
|
Chris@0
|
62 */
|
Chris@0
|
63 protected static $httpClient = null;
|
Chris@0
|
64
|
Chris@0
|
65 /**
|
Chris@0
|
66 * Override HTTP PUT and DELETE request methods?
|
Chris@0
|
67 *
|
Chris@0
|
68 * @var bool
|
Chris@0
|
69 */
|
Chris@0
|
70 protected static $httpMethodOverride = false;
|
Chris@0
|
71
|
Chris@0
|
72 protected static $httpConditionalGet = false;
|
Chris@0
|
73
|
Chris@0
|
74 protected static $extensionManager = null;
|
Chris@0
|
75
|
Chris@0
|
76 protected static $extensions = [
|
Chris@0
|
77 'feed' => [
|
Chris@0
|
78 'DublinCore\Feed',
|
Chris@0
|
79 'Atom\Feed'
|
Chris@0
|
80 ],
|
Chris@0
|
81 'entry' => [
|
Chris@0
|
82 'Content\Entry',
|
Chris@0
|
83 'DublinCore\Entry',
|
Chris@0
|
84 'Atom\Entry'
|
Chris@0
|
85 ],
|
Chris@0
|
86 'core' => [
|
Chris@0
|
87 'DublinCore\Feed',
|
Chris@0
|
88 'Atom\Feed',
|
Chris@0
|
89 'Content\Entry',
|
Chris@0
|
90 'DublinCore\Entry',
|
Chris@0
|
91 'Atom\Entry'
|
Chris@0
|
92 ]
|
Chris@0
|
93 ];
|
Chris@0
|
94
|
Chris@0
|
95 /**
|
Chris@0
|
96 * Get the Feed cache
|
Chris@0
|
97 *
|
Chris@0
|
98 * @return CacheStorage
|
Chris@0
|
99 */
|
Chris@0
|
100 public static function getCache()
|
Chris@0
|
101 {
|
Chris@0
|
102 return static::$cache;
|
Chris@0
|
103 }
|
Chris@0
|
104
|
Chris@0
|
105 /**
|
Chris@0
|
106 * Set the feed cache
|
Chris@0
|
107 *
|
Chris@0
|
108 * @param CacheStorage $cache
|
Chris@0
|
109 * @return void
|
Chris@0
|
110 */
|
Chris@0
|
111 public static function setCache(CacheStorage $cache)
|
Chris@0
|
112 {
|
Chris@0
|
113 static::$cache = $cache;
|
Chris@0
|
114 }
|
Chris@0
|
115
|
Chris@0
|
116 /**
|
Chris@0
|
117 * Set the HTTP client instance
|
Chris@0
|
118 *
|
Chris@0
|
119 * Sets the HTTP client object to use for retrieving the feeds.
|
Chris@0
|
120 *
|
Chris@0
|
121 * @param ZendHttp\Client | Http\ClientInterface $httpClient
|
Chris@0
|
122 * @return void
|
Chris@0
|
123 */
|
Chris@0
|
124 public static function setHttpClient($httpClient)
|
Chris@0
|
125 {
|
Chris@0
|
126 if ($httpClient instanceof ZendHttp\Client) {
|
Chris@0
|
127 $httpClient = new Http\ZendHttpClientDecorator($httpClient);
|
Chris@0
|
128 }
|
Chris@0
|
129
|
Chris@0
|
130 if (! $httpClient instanceof Http\ClientInterface) {
|
Chris@0
|
131 throw new InvalidHttpClientException();
|
Chris@0
|
132 }
|
Chris@0
|
133 static::$httpClient = $httpClient;
|
Chris@0
|
134 }
|
Chris@0
|
135
|
Chris@0
|
136 /**
|
Chris@0
|
137 * Gets the HTTP client object. If none is set, a new ZendHttp\Client will be used.
|
Chris@0
|
138 *
|
Chris@0
|
139 * @return Http\ClientInterface
|
Chris@0
|
140 */
|
Chris@0
|
141 public static function getHttpClient()
|
Chris@0
|
142 {
|
Chris@0
|
143 if (! static::$httpClient) {
|
Chris@0
|
144 static::$httpClient = new Http\ZendHttpClientDecorator(new ZendHttp\Client());
|
Chris@0
|
145 }
|
Chris@0
|
146
|
Chris@0
|
147 return static::$httpClient;
|
Chris@0
|
148 }
|
Chris@0
|
149
|
Chris@0
|
150 /**
|
Chris@0
|
151 * Toggle using POST instead of PUT and DELETE HTTP methods
|
Chris@0
|
152 *
|
Chris@0
|
153 * Some feed implementations do not accept PUT and DELETE HTTP
|
Chris@0
|
154 * methods, or they can't be used because of proxies or other
|
Chris@0
|
155 * measures. This allows turning on using POST where PUT and
|
Chris@0
|
156 * DELETE would normally be used; in addition, an
|
Chris@0
|
157 * X-Method-Override header will be sent with a value of PUT or
|
Chris@0
|
158 * DELETE as appropriate.
|
Chris@0
|
159 *
|
Chris@0
|
160 * @param bool $override Whether to override PUT and DELETE.
|
Chris@0
|
161 * @return void
|
Chris@0
|
162 */
|
Chris@0
|
163 public static function setHttpMethodOverride($override = true)
|
Chris@0
|
164 {
|
Chris@0
|
165 static::$httpMethodOverride = $override;
|
Chris@0
|
166 }
|
Chris@0
|
167
|
Chris@0
|
168 /**
|
Chris@0
|
169 * Get the HTTP override state
|
Chris@0
|
170 *
|
Chris@0
|
171 * @return bool
|
Chris@0
|
172 */
|
Chris@0
|
173 public static function getHttpMethodOverride()
|
Chris@0
|
174 {
|
Chris@0
|
175 return static::$httpMethodOverride;
|
Chris@0
|
176 }
|
Chris@0
|
177
|
Chris@0
|
178 /**
|
Chris@0
|
179 * Set the flag indicating whether or not to use HTTP conditional GET
|
Chris@0
|
180 *
|
Chris@0
|
181 * @param bool $bool
|
Chris@0
|
182 * @return void
|
Chris@0
|
183 */
|
Chris@0
|
184 public static function useHttpConditionalGet($bool = true)
|
Chris@0
|
185 {
|
Chris@0
|
186 static::$httpConditionalGet = $bool;
|
Chris@0
|
187 }
|
Chris@0
|
188
|
Chris@0
|
189 /**
|
Chris@0
|
190 * Import a feed by providing a URI
|
Chris@0
|
191 *
|
Chris@0
|
192 * @param string $uri The URI to the feed
|
Chris@0
|
193 * @param string $etag OPTIONAL Last received ETag for this resource
|
Chris@0
|
194 * @param string $lastModified OPTIONAL Last-Modified value for this resource
|
Chris@0
|
195 * @return Feed\FeedInterface
|
Chris@0
|
196 * @throws Exception\RuntimeException
|
Chris@0
|
197 */
|
Chris@0
|
198 public static function import($uri, $etag = null, $lastModified = null)
|
Chris@0
|
199 {
|
Chris@0
|
200 $cache = self::getCache();
|
Chris@0
|
201 $client = self::getHttpClient();
|
Chris@0
|
202 $cacheId = 'Zend_Feed_Reader_' . md5($uri);
|
Chris@0
|
203
|
Chris@0
|
204 if (static::$httpConditionalGet && $cache) {
|
Chris@0
|
205 $headers = [];
|
Chris@0
|
206 $data = $cache->getItem($cacheId);
|
Chris@0
|
207 if ($data && $client instanceof Http\HeaderAwareClientInterface) {
|
Chris@0
|
208 // Only check for ETag and last modified values in the cache
|
Chris@0
|
209 // if we have a client capable of emitting headers in the first place.
|
Chris@0
|
210 if ($etag === null) {
|
Chris@0
|
211 $etag = $cache->getItem($cacheId . '_etag');
|
Chris@0
|
212 }
|
Chris@0
|
213 if ($lastModified === null) {
|
Chris@0
|
214 $lastModified = $cache->getItem($cacheId . '_lastmodified');
|
Chris@0
|
215 }
|
Chris@0
|
216 if ($etag) {
|
Chris@0
|
217 $headers['If-None-Match'] = [$etag];
|
Chris@0
|
218 }
|
Chris@0
|
219 if ($lastModified) {
|
Chris@0
|
220 $headers['If-Modified-Since'] = [$lastModified];
|
Chris@0
|
221 }
|
Chris@0
|
222 }
|
Chris@0
|
223 $response = $client->get($uri, $headers);
|
Chris@0
|
224 if ($response->getStatusCode() !== 200 && $response->getStatusCode() !== 304) {
|
Chris@12
|
225 throw new Exception\RuntimeException(
|
Chris@12
|
226 'Feed failed to load, got response code ' . $response->getStatusCode()
|
Chris@12
|
227 );
|
Chris@0
|
228 }
|
Chris@0
|
229 if ($response->getStatusCode() == 304) {
|
Chris@0
|
230 $responseXml = $data;
|
Chris@0
|
231 } else {
|
Chris@0
|
232 $responseXml = $response->getBody();
|
Chris@0
|
233 $cache->setItem($cacheId, $responseXml);
|
Chris@0
|
234
|
Chris@0
|
235 if ($response instanceof Http\HeaderAwareResponseInterface) {
|
Chris@0
|
236 if ($response->getHeaderLine('ETag', false)) {
|
Chris@0
|
237 $cache->setItem($cacheId . '_etag', $response->getHeaderLine('ETag'));
|
Chris@0
|
238 }
|
Chris@0
|
239 if ($response->getHeaderLine('Last-Modified', false)) {
|
Chris@0
|
240 $cache->setItem($cacheId . '_lastmodified', $response->getHeaderLine('Last-Modified'));
|
Chris@0
|
241 }
|
Chris@0
|
242 }
|
Chris@0
|
243 }
|
Chris@0
|
244 return static::importString($responseXml);
|
Chris@0
|
245 } elseif ($cache) {
|
Chris@0
|
246 $data = $cache->getItem($cacheId);
|
Chris@0
|
247 if ($data) {
|
Chris@0
|
248 return static::importString($data);
|
Chris@0
|
249 }
|
Chris@0
|
250 $response = $client->get($uri);
|
Chris@0
|
251 if ((int) $response->getStatusCode() !== 200) {
|
Chris@12
|
252 throw new Exception\RuntimeException(
|
Chris@12
|
253 'Feed failed to load, got response code ' . $response->getStatusCode()
|
Chris@12
|
254 );
|
Chris@0
|
255 }
|
Chris@0
|
256 $responseXml = $response->getBody();
|
Chris@0
|
257 $cache->setItem($cacheId, $responseXml);
|
Chris@0
|
258 return static::importString($responseXml);
|
Chris@0
|
259 } else {
|
Chris@0
|
260 $response = $client->get($uri);
|
Chris@0
|
261 if ((int) $response->getStatusCode() !== 200) {
|
Chris@12
|
262 throw new Exception\RuntimeException(
|
Chris@12
|
263 'Feed failed to load, got response code ' . $response->getStatusCode()
|
Chris@12
|
264 );
|
Chris@0
|
265 }
|
Chris@0
|
266 $reader = static::importString($response->getBody());
|
Chris@0
|
267 $reader->setOriginalSourceUri($uri);
|
Chris@0
|
268 return $reader;
|
Chris@0
|
269 }
|
Chris@0
|
270 }
|
Chris@0
|
271
|
Chris@0
|
272 /**
|
Chris@0
|
273 * Import a feed from a remote URI
|
Chris@0
|
274 *
|
Chris@0
|
275 * Performs similarly to import(), except it uses the HTTP client passed to
|
Chris@0
|
276 * the method, and does not take into account cached data.
|
Chris@0
|
277 *
|
Chris@0
|
278 * Primary purpose is to make it possible to use the Reader with alternate
|
Chris@0
|
279 * HTTP client implementations.
|
Chris@0
|
280 *
|
Chris@0
|
281 * @param string $uri
|
Chris@0
|
282 * @param Http\ClientInterface $client
|
Chris@17
|
283 * @return Feed\FeedInterface
|
Chris@0
|
284 * @throws Exception\RuntimeException if response is not an Http\ResponseInterface
|
Chris@0
|
285 */
|
Chris@0
|
286 public static function importRemoteFeed($uri, Http\ClientInterface $client)
|
Chris@0
|
287 {
|
Chris@0
|
288 $response = $client->get($uri);
|
Chris@0
|
289 if (! $response instanceof Http\ResponseInterface) {
|
Chris@0
|
290 throw new Exception\RuntimeException(sprintf(
|
Chris@0
|
291 'Did not receive a %s\Http\ResponseInterface from the provided HTTP client; received "%s"',
|
Chris@0
|
292 __NAMESPACE__,
|
Chris@0
|
293 (is_object($response) ? get_class($response) : gettype($response))
|
Chris@0
|
294 ));
|
Chris@0
|
295 }
|
Chris@0
|
296
|
Chris@0
|
297 if ((int) $response->getStatusCode() !== 200) {
|
Chris@12
|
298 throw new Exception\RuntimeException(
|
Chris@12
|
299 'Feed failed to load, got response code ' . $response->getStatusCode()
|
Chris@12
|
300 );
|
Chris@0
|
301 }
|
Chris@0
|
302 $reader = static::importString($response->getBody());
|
Chris@0
|
303 $reader->setOriginalSourceUri($uri);
|
Chris@0
|
304 return $reader;
|
Chris@0
|
305 }
|
Chris@0
|
306
|
Chris@0
|
307 /**
|
Chris@0
|
308 * Import a feed from a string
|
Chris@0
|
309 *
|
Chris@0
|
310 * @param string $string
|
Chris@0
|
311 * @return Feed\FeedInterface
|
Chris@0
|
312 * @throws Exception\InvalidArgumentException
|
Chris@0
|
313 * @throws Exception\RuntimeException
|
Chris@0
|
314 */
|
Chris@0
|
315 public static function importString($string)
|
Chris@0
|
316 {
|
Chris@0
|
317 $trimmed = trim($string);
|
Chris@12
|
318 if (! is_string($string) || empty($trimmed)) {
|
Chris@0
|
319 throw new Exception\InvalidArgumentException('Only non empty strings are allowed as input');
|
Chris@0
|
320 }
|
Chris@0
|
321
|
Chris@0
|
322 $libxmlErrflag = libxml_use_internal_errors(true);
|
Chris@0
|
323 $oldValue = libxml_disable_entity_loader(true);
|
Chris@0
|
324 $dom = new DOMDocument;
|
Chris@0
|
325 $status = $dom->loadXML(trim($string));
|
Chris@0
|
326 foreach ($dom->childNodes as $child) {
|
Chris@0
|
327 if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) {
|
Chris@0
|
328 throw new Exception\InvalidArgumentException(
|
Chris@0
|
329 'Invalid XML: Detected use of illegal DOCTYPE'
|
Chris@0
|
330 );
|
Chris@0
|
331 }
|
Chris@0
|
332 }
|
Chris@0
|
333 libxml_disable_entity_loader($oldValue);
|
Chris@0
|
334 libxml_use_internal_errors($libxmlErrflag);
|
Chris@0
|
335
|
Chris@12
|
336 if (! $status) {
|
Chris@0
|
337 // Build error message
|
Chris@0
|
338 $error = libxml_get_last_error();
|
Chris@0
|
339 if ($error && $error->message) {
|
Chris@0
|
340 $error->message = trim($error->message);
|
Chris@0
|
341 $errormsg = "DOMDocument cannot parse XML: {$error->message}";
|
Chris@0
|
342 } else {
|
Chris@0
|
343 $errormsg = "DOMDocument cannot parse XML: Please check the XML document's validity";
|
Chris@0
|
344 }
|
Chris@0
|
345 throw new Exception\RuntimeException($errormsg);
|
Chris@0
|
346 }
|
Chris@0
|
347
|
Chris@0
|
348 $type = static::detectType($dom);
|
Chris@0
|
349
|
Chris@0
|
350 static::registerCoreExtensions();
|
Chris@0
|
351
|
Chris@17
|
352 if (0 === strpos($type, 'rss')) {
|
Chris@0
|
353 $reader = new Feed\Rss($dom, $type);
|
Chris@17
|
354 } elseif (8 === strpos($type, 'entry')) {
|
Chris@0
|
355 $reader = new Entry\Atom($dom->documentElement, 0, self::TYPE_ATOM_10);
|
Chris@17
|
356 } elseif (0 === strpos($type, 'atom')) {
|
Chris@0
|
357 $reader = new Feed\Atom($dom, $type);
|
Chris@0
|
358 } else {
|
Chris@0
|
359 throw new Exception\RuntimeException('The URI used does not point to a '
|
Chris@0
|
360 . 'valid Atom, RSS or RDF feed that Zend\Feed\Reader can parse.');
|
Chris@0
|
361 }
|
Chris@0
|
362 return $reader;
|
Chris@0
|
363 }
|
Chris@0
|
364
|
Chris@0
|
365 /**
|
Chris@0
|
366 * Imports a feed from a file located at $filename.
|
Chris@0
|
367 *
|
Chris@0
|
368 * @param string $filename
|
Chris@0
|
369 * @throws Exception\RuntimeException
|
Chris@0
|
370 * @return Feed\FeedInterface
|
Chris@0
|
371 */
|
Chris@0
|
372 public static function importFile($filename)
|
Chris@0
|
373 {
|
Chris@0
|
374 ErrorHandler::start();
|
Chris@0
|
375 $feed = file_get_contents($filename);
|
Chris@0
|
376 $err = ErrorHandler::stop();
|
Chris@0
|
377 if ($feed === false) {
|
Chris@0
|
378 throw new Exception\RuntimeException("File '{$filename}' could not be loaded", 0, $err);
|
Chris@0
|
379 }
|
Chris@0
|
380 return static::importString($feed);
|
Chris@0
|
381 }
|
Chris@0
|
382
|
Chris@0
|
383 /**
|
Chris@0
|
384 * Find feed links
|
Chris@0
|
385 *
|
Chris@0
|
386 * @param $uri
|
Chris@0
|
387 * @return FeedSet
|
Chris@0
|
388 * @throws Exception\RuntimeException
|
Chris@0
|
389 */
|
Chris@0
|
390 public static function findFeedLinks($uri)
|
Chris@0
|
391 {
|
Chris@0
|
392 $client = static::getHttpClient();
|
Chris@0
|
393 $response = $client->get($uri);
|
Chris@0
|
394 if ($response->getStatusCode() !== 200) {
|
Chris@12
|
395 throw new Exception\RuntimeException(
|
Chris@12
|
396 "Failed to access $uri, got response code " . $response->getStatusCode()
|
Chris@12
|
397 );
|
Chris@0
|
398 }
|
Chris@0
|
399 $responseHtml = $response->getBody();
|
Chris@0
|
400 $libxmlErrflag = libxml_use_internal_errors(true);
|
Chris@0
|
401 $oldValue = libxml_disable_entity_loader(true);
|
Chris@0
|
402 $dom = new DOMDocument;
|
Chris@0
|
403 $status = $dom->loadHTML(trim($responseHtml));
|
Chris@0
|
404 libxml_disable_entity_loader($oldValue);
|
Chris@0
|
405 libxml_use_internal_errors($libxmlErrflag);
|
Chris@12
|
406 if (! $status) {
|
Chris@0
|
407 // Build error message
|
Chris@0
|
408 $error = libxml_get_last_error();
|
Chris@0
|
409 if ($error && $error->message) {
|
Chris@0
|
410 $error->message = trim($error->message);
|
Chris@0
|
411 $errormsg = "DOMDocument cannot parse HTML: {$error->message}";
|
Chris@0
|
412 } else {
|
Chris@0
|
413 $errormsg = "DOMDocument cannot parse HTML: Please check the XML document's validity";
|
Chris@0
|
414 }
|
Chris@0
|
415 throw new Exception\RuntimeException($errormsg);
|
Chris@0
|
416 }
|
Chris@0
|
417 $feedSet = new FeedSet;
|
Chris@0
|
418 $links = $dom->getElementsByTagName('link');
|
Chris@0
|
419 $feedSet->addLinks($links, $uri);
|
Chris@0
|
420 return $feedSet;
|
Chris@0
|
421 }
|
Chris@0
|
422
|
Chris@0
|
423 /**
|
Chris@0
|
424 * Detect the feed type of the provided feed
|
Chris@0
|
425 *
|
Chris@0
|
426 * @param Feed\AbstractFeed|DOMDocument|string $feed
|
Chris@0
|
427 * @param bool $specOnly
|
Chris@0
|
428 * @return string
|
Chris@0
|
429 * @throws Exception\InvalidArgumentException
|
Chris@0
|
430 * @throws Exception\RuntimeException
|
Chris@0
|
431 */
|
Chris@0
|
432 public static function detectType($feed, $specOnly = false)
|
Chris@0
|
433 {
|
Chris@0
|
434 if ($feed instanceof Feed\AbstractFeed) {
|
Chris@0
|
435 $dom = $feed->getDomDocument();
|
Chris@0
|
436 } elseif ($feed instanceof DOMDocument) {
|
Chris@0
|
437 $dom = $feed;
|
Chris@12
|
438 } elseif (is_string($feed) && ! empty($feed)) {
|
Chris@12
|
439 ErrorHandler::start(E_NOTICE | E_WARNING);
|
Chris@0
|
440 ini_set('track_errors', 1);
|
Chris@0
|
441 $oldValue = libxml_disable_entity_loader(true);
|
Chris@0
|
442 $dom = new DOMDocument;
|
Chris@0
|
443 $status = $dom->loadXML($feed);
|
Chris@0
|
444 foreach ($dom->childNodes as $child) {
|
Chris@0
|
445 if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) {
|
Chris@0
|
446 throw new Exception\InvalidArgumentException(
|
Chris@0
|
447 'Invalid XML: Detected use of illegal DOCTYPE'
|
Chris@0
|
448 );
|
Chris@0
|
449 }
|
Chris@0
|
450 }
|
Chris@0
|
451 libxml_disable_entity_loader($oldValue);
|
Chris@0
|
452 ini_restore('track_errors');
|
Chris@0
|
453 ErrorHandler::stop();
|
Chris@12
|
454 if (! $status) {
|
Chris@12
|
455 if (! isset($phpErrormsg)) {
|
Chris@0
|
456 if (function_exists('xdebug_is_enabled')) {
|
Chris@0
|
457 $phpErrormsg = '(error message not available, when XDebug is running)';
|
Chris@0
|
458 } else {
|
Chris@0
|
459 $phpErrormsg = '(error message not available)';
|
Chris@0
|
460 }
|
Chris@0
|
461 }
|
Chris@0
|
462 throw new Exception\RuntimeException("DOMDocument cannot parse XML: $phpErrormsg");
|
Chris@0
|
463 }
|
Chris@0
|
464 } else {
|
Chris@0
|
465 throw new Exception\InvalidArgumentException('Invalid object/scalar provided: must'
|
Chris@0
|
466 . ' be of type Zend\Feed\Reader\Feed, DomDocument or string');
|
Chris@0
|
467 }
|
Chris@0
|
468 $xpath = new DOMXPath($dom);
|
Chris@0
|
469
|
Chris@0
|
470 if ($xpath->query('/rss')->length) {
|
Chris@0
|
471 $type = self::TYPE_RSS_ANY;
|
Chris@0
|
472 $version = $xpath->evaluate('string(/rss/@version)');
|
Chris@0
|
473
|
Chris@0
|
474 if (strlen($version) > 0) {
|
Chris@0
|
475 switch ($version) {
|
Chris@0
|
476 case '2.0':
|
Chris@0
|
477 $type = self::TYPE_RSS_20;
|
Chris@0
|
478 break;
|
Chris@0
|
479
|
Chris@0
|
480 case '0.94':
|
Chris@0
|
481 $type = self::TYPE_RSS_094;
|
Chris@0
|
482 break;
|
Chris@0
|
483
|
Chris@0
|
484 case '0.93':
|
Chris@0
|
485 $type = self::TYPE_RSS_093;
|
Chris@0
|
486 break;
|
Chris@0
|
487
|
Chris@0
|
488 case '0.92':
|
Chris@0
|
489 $type = self::TYPE_RSS_092;
|
Chris@0
|
490 break;
|
Chris@0
|
491
|
Chris@0
|
492 case '0.91':
|
Chris@0
|
493 $type = self::TYPE_RSS_091;
|
Chris@0
|
494 break;
|
Chris@0
|
495 }
|
Chris@0
|
496 }
|
Chris@0
|
497
|
Chris@0
|
498 return $type;
|
Chris@0
|
499 }
|
Chris@0
|
500
|
Chris@0
|
501 $xpath->registerNamespace('rdf', self::NAMESPACE_RDF);
|
Chris@0
|
502
|
Chris@0
|
503 if ($xpath->query('/rdf:RDF')->length) {
|
Chris@0
|
504 $xpath->registerNamespace('rss', self::NAMESPACE_RSS_10);
|
Chris@0
|
505
|
Chris@0
|
506 if ($xpath->query('/rdf:RDF/rss:channel')->length
|
Chris@0
|
507 || $xpath->query('/rdf:RDF/rss:image')->length
|
Chris@0
|
508 || $xpath->query('/rdf:RDF/rss:item')->length
|
Chris@0
|
509 || $xpath->query('/rdf:RDF/rss:textinput')->length
|
Chris@0
|
510 ) {
|
Chris@0
|
511 return self::TYPE_RSS_10;
|
Chris@0
|
512 }
|
Chris@0
|
513
|
Chris@0
|
514 $xpath->registerNamespace('rss', self::NAMESPACE_RSS_090);
|
Chris@0
|
515
|
Chris@0
|
516 if ($xpath->query('/rdf:RDF/rss:channel')->length
|
Chris@0
|
517 || $xpath->query('/rdf:RDF/rss:image')->length
|
Chris@0
|
518 || $xpath->query('/rdf:RDF/rss:item')->length
|
Chris@0
|
519 || $xpath->query('/rdf:RDF/rss:textinput')->length
|
Chris@0
|
520 ) {
|
Chris@0
|
521 return self::TYPE_RSS_090;
|
Chris@0
|
522 }
|
Chris@0
|
523 }
|
Chris@0
|
524
|
Chris@0
|
525 $xpath->registerNamespace('atom', self::NAMESPACE_ATOM_10);
|
Chris@0
|
526
|
Chris@0
|
527 if ($xpath->query('//atom:feed')->length) {
|
Chris@0
|
528 return self::TYPE_ATOM_10;
|
Chris@0
|
529 }
|
Chris@0
|
530
|
Chris@0
|
531 if ($xpath->query('//atom:entry')->length) {
|
Chris@0
|
532 if ($specOnly == true) {
|
Chris@0
|
533 return self::TYPE_ATOM_10;
|
Chris@0
|
534 } else {
|
Chris@0
|
535 return self::TYPE_ATOM_10_ENTRY;
|
Chris@0
|
536 }
|
Chris@0
|
537 }
|
Chris@0
|
538
|
Chris@0
|
539 $xpath->registerNamespace('atom', self::NAMESPACE_ATOM_03);
|
Chris@0
|
540
|
Chris@0
|
541 if ($xpath->query('//atom:feed')->length) {
|
Chris@0
|
542 return self::TYPE_ATOM_03;
|
Chris@0
|
543 }
|
Chris@0
|
544
|
Chris@0
|
545 return self::TYPE_ANY;
|
Chris@0
|
546 }
|
Chris@0
|
547
|
Chris@0
|
548 /**
|
Chris@0
|
549 * Set plugin manager for use with Extensions
|
Chris@0
|
550 *
|
Chris@0
|
551 * @param ExtensionManagerInterface $extensionManager
|
Chris@0
|
552 */
|
Chris@0
|
553 public static function setExtensionManager(ExtensionManagerInterface $extensionManager)
|
Chris@0
|
554 {
|
Chris@0
|
555 static::$extensionManager = $extensionManager;
|
Chris@0
|
556 }
|
Chris@0
|
557
|
Chris@0
|
558 /**
|
Chris@0
|
559 * Get plugin manager for use with Extensions
|
Chris@0
|
560 *
|
Chris@0
|
561 * @return ExtensionManagerInterface
|
Chris@0
|
562 */
|
Chris@0
|
563 public static function getExtensionManager()
|
Chris@0
|
564 {
|
Chris@12
|
565 if (! isset(static::$extensionManager)) {
|
Chris@0
|
566 static::setExtensionManager(new StandaloneExtensionManager());
|
Chris@0
|
567 }
|
Chris@0
|
568 return static::$extensionManager;
|
Chris@0
|
569 }
|
Chris@0
|
570
|
Chris@0
|
571 /**
|
Chris@0
|
572 * Register an Extension by name
|
Chris@0
|
573 *
|
Chris@0
|
574 * @param string $name
|
Chris@0
|
575 * @return void
|
Chris@0
|
576 * @throws Exception\RuntimeException if unable to resolve Extension class
|
Chris@0
|
577 */
|
Chris@0
|
578 public static function registerExtension($name)
|
Chris@0
|
579 {
|
Chris@16
|
580 if (! static::hasExtension($name)) {
|
Chris@16
|
581 throw new Exception\RuntimeException(sprintf(
|
Chris@16
|
582 'Could not load extension "%s" using Plugin Loader.'
|
Chris@16
|
583 . ' Check prefix paths are configured and extension exists.',
|
Chris@16
|
584 $name
|
Chris@16
|
585 ));
|
Chris@0
|
586 }
|
Chris@0
|
587
|
Chris@16
|
588 // Return early if already registered.
|
Chris@16
|
589 if (static::isRegistered($name)) {
|
Chris@16
|
590 return;
|
Chris@0
|
591 }
|
Chris@16
|
592
|
Chris@16
|
593 $manager = static::getExtensionManager();
|
Chris@16
|
594
|
Chris@16
|
595 $feedName = $name . '\Feed';
|
Chris@0
|
596 if ($manager->has($feedName)) {
|
Chris@0
|
597 static::$extensions['feed'][] = $feedName;
|
Chris@0
|
598 }
|
Chris@16
|
599
|
Chris@16
|
600 $entryName = $name . '\Entry';
|
Chris@0
|
601 if ($manager->has($entryName)) {
|
Chris@0
|
602 static::$extensions['entry'][] = $entryName;
|
Chris@0
|
603 }
|
Chris@0
|
604 }
|
Chris@0
|
605
|
Chris@0
|
606 /**
|
Chris@0
|
607 * Is a given named Extension registered?
|
Chris@0
|
608 *
|
Chris@0
|
609 * @param string $extensionName
|
Chris@0
|
610 * @return bool
|
Chris@0
|
611 */
|
Chris@0
|
612 public static function isRegistered($extensionName)
|
Chris@0
|
613 {
|
Chris@0
|
614 $feedName = $extensionName . '\Feed';
|
Chris@0
|
615 $entryName = $extensionName . '\Entry';
|
Chris@0
|
616 if (in_array($feedName, static::$extensions['feed'])
|
Chris@0
|
617 || in_array($entryName, static::$extensions['entry'])
|
Chris@0
|
618 ) {
|
Chris@0
|
619 return true;
|
Chris@0
|
620 }
|
Chris@0
|
621 return false;
|
Chris@0
|
622 }
|
Chris@0
|
623
|
Chris@0
|
624 /**
|
Chris@0
|
625 * Get a list of extensions
|
Chris@0
|
626 *
|
Chris@0
|
627 * @return array
|
Chris@0
|
628 */
|
Chris@0
|
629 public static function getExtensions()
|
Chris@0
|
630 {
|
Chris@0
|
631 return static::$extensions;
|
Chris@0
|
632 }
|
Chris@0
|
633
|
Chris@0
|
634 /**
|
Chris@0
|
635 * Reset class state to defaults
|
Chris@0
|
636 *
|
Chris@0
|
637 * @return void
|
Chris@0
|
638 */
|
Chris@0
|
639 public static function reset()
|
Chris@0
|
640 {
|
Chris@0
|
641 static::$cache = null;
|
Chris@0
|
642 static::$httpClient = null;
|
Chris@0
|
643 static::$httpMethodOverride = false;
|
Chris@0
|
644 static::$httpConditionalGet = false;
|
Chris@0
|
645 static::$extensionManager = null;
|
Chris@0
|
646 static::$extensions = [
|
Chris@0
|
647 'feed' => [
|
Chris@0
|
648 'DublinCore\Feed',
|
Chris@0
|
649 'Atom\Feed'
|
Chris@0
|
650 ],
|
Chris@0
|
651 'entry' => [
|
Chris@0
|
652 'Content\Entry',
|
Chris@0
|
653 'DublinCore\Entry',
|
Chris@0
|
654 'Atom\Entry'
|
Chris@0
|
655 ],
|
Chris@0
|
656 'core' => [
|
Chris@0
|
657 'DublinCore\Feed',
|
Chris@0
|
658 'Atom\Feed',
|
Chris@0
|
659 'Content\Entry',
|
Chris@0
|
660 'DublinCore\Entry',
|
Chris@0
|
661 'Atom\Entry'
|
Chris@0
|
662 ]
|
Chris@0
|
663 ];
|
Chris@0
|
664 }
|
Chris@0
|
665
|
Chris@0
|
666 /**
|
Chris@0
|
667 * Register core (default) extensions
|
Chris@0
|
668 *
|
Chris@0
|
669 * @return void
|
Chris@0
|
670 */
|
Chris@0
|
671 protected static function registerCoreExtensions()
|
Chris@0
|
672 {
|
Chris@0
|
673 static::registerExtension('DublinCore');
|
Chris@0
|
674 static::registerExtension('Content');
|
Chris@0
|
675 static::registerExtension('Atom');
|
Chris@0
|
676 static::registerExtension('Slash');
|
Chris@0
|
677 static::registerExtension('WellFormedWeb');
|
Chris@0
|
678 static::registerExtension('Thread');
|
Chris@0
|
679 static::registerExtension('Podcast');
|
Chris@16
|
680
|
Chris@16
|
681 // Added in 2.10.0; check for it conditionally
|
Chris@16
|
682 static::hasExtension('GooglePlayPodcast')
|
Chris@16
|
683 ? static::registerExtension('GooglePlayPodcast')
|
Chris@16
|
684 : trigger_error(
|
Chris@16
|
685 sprintf(
|
Chris@16
|
686 'Please update your %1$s\ExtensionManagerInterface implementation to add entries for'
|
Chris@16
|
687 . ' %1$s\Extension\GooglePlayPodcast\Entry and %1$s\Extension\GooglePlayPodcast\Feed.',
|
Chris@16
|
688 __NAMESPACE__
|
Chris@16
|
689 ),
|
Chris@16
|
690 \E_USER_NOTICE
|
Chris@16
|
691 );
|
Chris@0
|
692 }
|
Chris@0
|
693
|
Chris@0
|
694 /**
|
Chris@0
|
695 * Utility method to apply array_unique operation to a multidimensional
|
Chris@0
|
696 * array.
|
Chris@0
|
697 *
|
Chris@0
|
698 * @param array
|
Chris@0
|
699 * @return array
|
Chris@0
|
700 */
|
Chris@0
|
701 public static function arrayUnique(array $array)
|
Chris@0
|
702 {
|
Chris@0
|
703 foreach ($array as &$value) {
|
Chris@0
|
704 $value = serialize($value);
|
Chris@0
|
705 }
|
Chris@0
|
706 $array = array_unique($array);
|
Chris@0
|
707 foreach ($array as &$value) {
|
Chris@0
|
708 $value = unserialize($value);
|
Chris@0
|
709 }
|
Chris@0
|
710 return $array;
|
Chris@0
|
711 }
|
Chris@16
|
712
|
Chris@16
|
713 /**
|
Chris@16
|
714 * Does the extension manager have the named extension?
|
Chris@16
|
715 *
|
Chris@16
|
716 * This method exists to allow us to test if an extension is present in the
|
Chris@16
|
717 * extension manager. It may be used by registerExtension() to determine if
|
Chris@16
|
718 * the extension has items present in the manager, or by
|
Chris@16
|
719 * registerCoreExtension() to determine if the core extension has entries
|
Chris@16
|
720 * in the extension manager. In the latter case, this can be useful when
|
Chris@16
|
721 * adding new extensions in a minor release, as custom extension manager
|
Chris@16
|
722 * implementations may not yet have an entry for the extension, which would
|
Chris@16
|
723 * then otherwise cause registerExtension() to fail.
|
Chris@16
|
724 *
|
Chris@16
|
725 * @param string $name
|
Chris@16
|
726 * @return bool
|
Chris@16
|
727 */
|
Chris@16
|
728 protected static function hasExtension($name)
|
Chris@16
|
729 {
|
Chris@16
|
730 $feedName = $name . '\Feed';
|
Chris@16
|
731 $entryName = $name . '\Entry';
|
Chris@16
|
732 $manager = static::getExtensionManager();
|
Chris@16
|
733
|
Chris@16
|
734 return $manager->has($feedName) || $manager->has($entryName);
|
Chris@16
|
735 }
|
Chris@0
|
736 }
|