Mercurial > hg > dml-open-vis
comparison src/DML/MainVisBundle/Command/Views/Geography/ParsePlacesCommand.php @ 0:493bcb69166c
added public content
author | Daniel Wolff |
---|---|
date | Tue, 09 Feb 2016 20:54:02 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:493bcb69166c |
---|---|
1 <?php | |
2 namespace DML\MainVisBundle\Command\Views\Geography; | |
3 | |
4 use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand; | |
5 use Symfony\Component\Console\Input\InputArgument; | |
6 use Symfony\Component\Console\Input\InputInterface; | |
7 use Symfony\Component\Console\Input\InputOption; | |
8 use Symfony\Component\Console\Output\OutputInterface; | |
9 | |
10 class ParsePlacesCommand extends ContainerAwareCommand | |
11 { | |
12 protected $sourceURL = "http://mirg.city.ac.uk/cp/api/v4/listPlaces?format=json"; | |
13 protected $nominatimURLTemplate = "http://nominatim.openstreetmap.org/search?format=json&limit=1&addressdetails=1&email=%s&q=%s"; | |
14 protected $nominatimEmail = "alexander.kachkaev@city.ac.uk"; | |
15 protected $nominatimAppName = "DML VIS (place name caching script)"; // | |
16 protected $resultRelativePath = "$/views/geography/parsedPlaces.json"; | |
17 protected $secondsBetweenRequests = 1; | |
18 | |
19 protected function configure() | |
20 { | |
21 $this | |
22 ->setName('dml:views:geography:parse-places') | |
23 ->setDescription('For each place attribute found at the cliopatria server, collects geo data from OSM Nominatim and saves the result to a web dir') | |
24 ->addOption( | |
25 'reset', | |
26 null, | |
27 InputOption::VALUE_NONE, | |
28 'Resets the result file before the start' | |
29 ) | |
30 ->addOption( | |
31 'process-nulls', | |
32 null, | |
33 InputOption::VALUE_NONE, | |
34 'Re-obtains data for items that were null in the result (could not be geocoded eariler)' | |
35 ) | |
36 ; | |
37 } | |
38 | |
39 protected function execute(InputInterface $input, OutputInterface $output) | |
40 { | |
41 $output->writeln(sprintf("Reading from <comment>%s</comment>", $this->sourceURL)); | |
42 | |
43 $sourceData = json_decode(file_get_contents($this->sourceURL), true); | |
44 | |
45 $placeNames = array(); | |
46 foreach($sourceData["result"]["places"] as $placeEntity) { | |
47 array_push($placeNames, $placeEntity["name"]); | |
48 } | |
49 | |
50 $output->writeln(sprintf("<comment>%s</comment> places found.", sizeof($placeNames))); | |
51 | |
52 $resultPath = $this->getContainer()->getParameter("kernel.root_dir") . '/../' . $this->resultRelativePath; | |
53 | |
54 $result = array(); | |
55 if (!file_exists($resultPath) || $input->getOption("reset")) { | |
56 $output->writeln(sprintf("Starting from scratch...", sizeof($placeNames))); | |
57 $placeNamesToProcess = $placeNames; | |
58 } else { | |
59 $result = json_decode(file_get_contents($resultPath), true); | |
60 $existingPlaceNames = array_keys($result); | |
61 $placeNamesToProcess = array_diff($placeNames, $existingPlaceNames); | |
62 | |
63 if ($input->getOption("process-nulls")) { | |
64 foreach($existingPlaceNames as $placeName) { | |
65 if ($result[$placeName] == null) { | |
66 array_push($placeNamesToProcess, $placeName); | |
67 } | |
68 } | |
69 } | |
70 } | |
71 $output->writeln(sprintf("<comment>%s</comment> place names to process.", sizeof($placeNamesToProcess))); | |
72 | |
73 foreach($placeNamesToProcess as $placeName) { | |
74 $output->write(sprintf('<comment>%s</comment>', $placeName)); | |
75 $chunks = explode(',', str_replace(array("(", ")"), array(",", ""), $placeName)); | |
76 $placeResult = null; | |
77 for ($i = 0; $i < sizeof($chunks); $i++) { | |
78 $query = implode(', ', array_slice($chunks, $i)); | |
79 $queryURL = sprintf($this->nominatimURLTemplate, urlencode($this->nominatimEmail), urlencode($query)); | |
80 | |
81 $allowedNOfErrors = 5; | |
82 while (--$allowedNOfErrors) { | |
83 sleep($this->secondsBetweenRequests); | |
84 $ch = curl_init($queryURL); | |
85 curl_setopt($ch,CURLOPT_RETURNTRANSFER,1); | |
86 curl_setopt($ch,CURLOPT_TIMEOUT,30); | |
87 curl_setopt($ch,CURLOPT_USERAGENT,$this->nominatimAppName); | |
88 $curlOutput = curl_exec($ch); | |
89 $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE); | |
90 curl_close($ch); | |
91 | |
92 if ($httpcode !== 200) { | |
93 $output->write(sprintf('<error>!%s!</error>', $httpcode)); | |
94 } else { | |
95 $nominatimResult = json_decode($curlOutput, true); | |
96 if (sizeof($nominatimResult)) { | |
97 $placeResult = $nominatimResult[0]['address']; | |
98 if ($i) { | |
99 $placeResult['trim'] = $i; | |
100 } | |
101 } | |
102 break; | |
103 } | |
104 } | |
105 if (!$allowedNOfErrors) { | |
106 throw new \Exception("Too many consequent errors in \"$queryURL\" :\n $curlOutput"); | |
107 } | |
108 if ($placeResult) { | |
109 break; | |
110 } else { | |
111 $output->write(sprintf('<info>%s</info>', $i + 1)); | |
112 } | |
113 } | |
114 if ($placeResult) { | |
115 $output->writeln('<info>+</info>'); | |
116 } else { | |
117 $output->writeln('<error>-</error>'); | |
118 } | |
119 | |
120 $result[$placeName] = $placeResult; | |
121 | |
122 $encodedResult = json_encode($result); | |
123 $encodedResult = str_replace(array('null,', '},', '}}'), array("null,\n", "},\n", "}\n}"), $encodedResult); | |
124 | |
125 file_put_contents($resultPath, $encodedResult); | |
126 } | |
127 | |
128 $output->writeln("Done."); | |
129 } | |
130 } |