danielebarchiesi@4
|
1 <?php
|
danielebarchiesi@4
|
2 /**
|
danielebarchiesi@4
|
3 * ARC2 format detection function
|
danielebarchiesi@4
|
4 *
|
danielebarchiesi@4
|
5 * @author Benjamin Nowack <bnowack@semsol.com>
|
danielebarchiesi@4
|
6 * @license http://arc.semsol.org/license
|
danielebarchiesi@4
|
7 * @package ARC2
|
danielebarchiesi@4
|
8 * @version 2010-11-16
|
danielebarchiesi@4
|
9 */
|
danielebarchiesi@4
|
10
|
danielebarchiesi@4
|
11 function ARC2_getFormat($v, $mtype = '', $ext = '') {
|
danielebarchiesi@4
|
12 $r = false;
|
danielebarchiesi@4
|
13 /* mtype check (atom, rdf/xml, turtle, n3, mp3, jpg) */
|
danielebarchiesi@4
|
14 $r = (!$r && preg_match('/\/atom\+xml/', $mtype)) ? 'atom' : $r;
|
danielebarchiesi@4
|
15 $r = (!$r && preg_match('/\/rdf\+xml/', $mtype)) ? 'rdfxml' : $r;
|
danielebarchiesi@4
|
16 $r = (!$r && preg_match('/\/(x\-)?turtle/', $mtype)) ? 'turtle' : $r;
|
danielebarchiesi@4
|
17 $r = (!$r && preg_match('/\/rdf\+n3/', $mtype)) ? 'n3' : $r;
|
danielebarchiesi@4
|
18 $r = (!$r && preg_match('/\/sparql-results\+xml/', $mtype)) ? 'sparqlxml' : $r;
|
danielebarchiesi@4
|
19 /* xml sniffing */
|
danielebarchiesi@4
|
20 if (
|
danielebarchiesi@4
|
21 !$r &&
|
danielebarchiesi@4
|
22 /* starts with angle brackets */
|
danielebarchiesi@4
|
23 preg_match('/^\s*\<[^\s]/s', $v) &&
|
danielebarchiesi@4
|
24 /* has an xmlns:* declaration or a matching pair of tags */
|
danielebarchiesi@4
|
25 (preg_match('/\sxmlns\:?/', $v) || preg_match('/\<([^\s]+).+\<\/\\1\>/s', $v)) // &&
|
danielebarchiesi@4
|
26 ) {
|
danielebarchiesi@4
|
27 while (preg_match('/^\s*\<\?xml[^\r\n]+\?\>\s*/s', $v)) {
|
danielebarchiesi@4
|
28 $v = preg_replace('/^\s*\<\?xml[^\r\n]+\?\>\s*/s', '', $v);
|
danielebarchiesi@4
|
29 }
|
danielebarchiesi@4
|
30 while (preg_match('/^\s*\<\!--.+?--\>\s*/s', $v)) {
|
danielebarchiesi@4
|
31 $v = preg_replace('/^\s*\<\!--.+?--\>\s*/s', '', $v);
|
danielebarchiesi@4
|
32 }
|
danielebarchiesi@4
|
33 /* doctype checks (html, rdf) */
|
danielebarchiesi@4
|
34 $r = (!$r && preg_match('/^\s*\<\!DOCTYPE\s+html[\s|\>]/is', $v)) ? 'html' : $r;
|
danielebarchiesi@4
|
35 $r = (!$r && preg_match('/^\s*\<\!DOCTYPE\s+[a-z0-9\_\-]\:RDF\s/is', $v)) ? 'rdfxml' : $r;
|
danielebarchiesi@4
|
36 /* markup checks */
|
danielebarchiesi@4
|
37 $v = preg_replace('/^\s*\<\!DOCTYPE\s.*\]\>/is', '', $v);
|
danielebarchiesi@4
|
38 $r = (!$r && preg_match('/^\s*\<rss\s+[^\>]*version/s', $v)) ? 'rss' : $r;
|
danielebarchiesi@4
|
39 $r = (!$r && preg_match('/^\s*\<feed\s+[^\>]+http\:\/\/www\.w3\.org\/2005\/Atom/s', $v)) ? 'atom' : $r;
|
danielebarchiesi@4
|
40 $r = (!$r && preg_match('/^\s*\<opml\s/s', $v)) ? 'opml' : $r;
|
danielebarchiesi@4
|
41 $r = (!$r && preg_match('/^\s*\<html[\s|\>]/is', $v)) ? 'html' : $r;
|
danielebarchiesi@4
|
42 $r = (!$r && preg_match('/^\s*\<sparql\s+[^\>]+http\:\/\/www\.w3\.org\/2005\/sparql\-results\#/s', $v)) ? 'sparqlxml' : $r;
|
danielebarchiesi@4
|
43 $r = (!$r && preg_match('/^\s*\<[^\>]+http\:\/\/www\.w3\.org\/2005\/sparql\-results#/s', $v)) ? 'srx' : $r;
|
danielebarchiesi@4
|
44 $r = (!$r && preg_match('/^\s*\<[^\s]*RDF[\s\>]/s', $v)) ? 'rdfxml' : $r;
|
danielebarchiesi@4
|
45 $r = (!$r && preg_match('/^\s*\<[^\>]+http\:\/\/www\.w3\.org\/1999\/02\/22\-rdf/s', $v)) ? 'rdfxml' : $r;
|
danielebarchiesi@4
|
46
|
danielebarchiesi@4
|
47 $r = !$r ? 'xml' : $r;
|
danielebarchiesi@4
|
48 }
|
danielebarchiesi@4
|
49 /* json|jsonp */
|
danielebarchiesi@4
|
50 if (!$r && preg_match('/^[a-z0-9\.\(]*\s*[\{\[].*/s', trim($v))) {
|
danielebarchiesi@4
|
51 /* google social graph api */
|
danielebarchiesi@4
|
52 $r = (!$r && preg_match('/\"canonical_mapping\"/', $v)) ? 'sgajson' : $r;
|
danielebarchiesi@4
|
53 /* crunchbase api */
|
danielebarchiesi@4
|
54 $r = (!$r && preg_match('/\"permalink\"/', $v)) ? 'cbjson' : $r;
|
danielebarchiesi@4
|
55
|
danielebarchiesi@4
|
56 $r = !$r ? 'json' : $r;
|
danielebarchiesi@4
|
57 }
|
danielebarchiesi@4
|
58 /* turtle/n3 */
|
danielebarchiesi@4
|
59 $r = (!$r && preg_match('/\@(prefix|base)/i', $v)) ? 'turtle' : $r;
|
danielebarchiesi@4
|
60 $r = (!$r && preg_match('/^(ttl)$/', $ext)) ? 'turtle' : $r;
|
danielebarchiesi@4
|
61 $r = (!$r && preg_match('/^(n3)$/', $ext)) ? 'n3' : $r;
|
danielebarchiesi@4
|
62 /* ntriples */
|
danielebarchiesi@4
|
63 $r = (!$r && preg_match('/^\s*(_:|<).+?\s+<[^>]+?>\s+\S.+?\s*\.\s*$/sm', $v)) ? 'ntriples' : $r;
|
danielebarchiesi@4
|
64 $r = (!$r && preg_match('/^(nt)$/', $ext)) ? 'ntriples' : $r;
|
danielebarchiesi@4
|
65 return $r;
|
danielebarchiesi@4
|
66 }
|