Mercurial > hg > dml-open-vis
comparison src/DML/MainVisBundle/Command/Views/Geography/ParsePlacesCommand.php @ 0:493bcb69166c
added public content
| author | Daniel Wolff |
|---|---|
| date | Tue, 09 Feb 2016 20:54:02 +0100 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:493bcb69166c |
|---|---|
| 1 <?php | |
| 2 namespace DML\MainVisBundle\Command\Views\Geography; | |
| 3 | |
| 4 use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand; | |
| 5 use Symfony\Component\Console\Input\InputArgument; | |
| 6 use Symfony\Component\Console\Input\InputInterface; | |
| 7 use Symfony\Component\Console\Input\InputOption; | |
| 8 use Symfony\Component\Console\Output\OutputInterface; | |
| 9 | |
| 10 class ParsePlacesCommand extends ContainerAwareCommand | |
| 11 { | |
| 12 protected $sourceURL = "http://mirg.city.ac.uk/cp/api/v4/listPlaces?format=json"; | |
| 13 protected $nominatimURLTemplate = "http://nominatim.openstreetmap.org/search?format=json&limit=1&addressdetails=1&email=%s&q=%s"; | |
| 14 protected $nominatimEmail = "alexander.kachkaev@city.ac.uk"; | |
| 15 protected $nominatimAppName = "DML VIS (place name caching script)"; // | |
| 16 protected $resultRelativePath = "$/views/geography/parsedPlaces.json"; | |
| 17 protected $secondsBetweenRequests = 1; | |
| 18 | |
| 19 protected function configure() | |
| 20 { | |
| 21 $this | |
| 22 ->setName('dml:views:geography:parse-places') | |
| 23 ->setDescription('For each place attribute found at the cliopatria server, collects geo data from OSM Nominatim and saves the result to a web dir') | |
| 24 ->addOption( | |
| 25 'reset', | |
| 26 null, | |
| 27 InputOption::VALUE_NONE, | |
| 28 'Resets the result file before the start' | |
| 29 ) | |
| 30 ->addOption( | |
| 31 'process-nulls', | |
| 32 null, | |
| 33 InputOption::VALUE_NONE, | |
| 34 'Re-obtains data for items that were null in the result (could not be geocoded eariler)' | |
| 35 ) | |
| 36 ; | |
| 37 } | |
| 38 | |
| 39 protected function execute(InputInterface $input, OutputInterface $output) | |
| 40 { | |
| 41 $output->writeln(sprintf("Reading from <comment>%s</comment>", $this->sourceURL)); | |
| 42 | |
| 43 $sourceData = json_decode(file_get_contents($this->sourceURL), true); | |
| 44 | |
| 45 $placeNames = array(); | |
| 46 foreach($sourceData["result"]["places"] as $placeEntity) { | |
| 47 array_push($placeNames, $placeEntity["name"]); | |
| 48 } | |
| 49 | |
| 50 $output->writeln(sprintf("<comment>%s</comment> places found.", sizeof($placeNames))); | |
| 51 | |
| 52 $resultPath = $this->getContainer()->getParameter("kernel.root_dir") . '/../' . $this->resultRelativePath; | |
| 53 | |
| 54 $result = array(); | |
| 55 if (!file_exists($resultPath) || $input->getOption("reset")) { | |
| 56 $output->writeln(sprintf("Starting from scratch...", sizeof($placeNames))); | |
| 57 $placeNamesToProcess = $placeNames; | |
| 58 } else { | |
| 59 $result = json_decode(file_get_contents($resultPath), true); | |
| 60 $existingPlaceNames = array_keys($result); | |
| 61 $placeNamesToProcess = array_diff($placeNames, $existingPlaceNames); | |
| 62 | |
| 63 if ($input->getOption("process-nulls")) { | |
| 64 foreach($existingPlaceNames as $placeName) { | |
| 65 if ($result[$placeName] == null) { | |
| 66 array_push($placeNamesToProcess, $placeName); | |
| 67 } | |
| 68 } | |
| 69 } | |
| 70 } | |
| 71 $output->writeln(sprintf("<comment>%s</comment> place names to process.", sizeof($placeNamesToProcess))); | |
| 72 | |
| 73 foreach($placeNamesToProcess as $placeName) { | |
| 74 $output->write(sprintf('<comment>%s</comment>', $placeName)); | |
| 75 $chunks = explode(',', str_replace(array("(", ")"), array(",", ""), $placeName)); | |
| 76 $placeResult = null; | |
| 77 for ($i = 0; $i < sizeof($chunks); $i++) { | |
| 78 $query = implode(', ', array_slice($chunks, $i)); | |
| 79 $queryURL = sprintf($this->nominatimURLTemplate, urlencode($this->nominatimEmail), urlencode($query)); | |
| 80 | |
| 81 $allowedNOfErrors = 5; | |
| 82 while (--$allowedNOfErrors) { | |
| 83 sleep($this->secondsBetweenRequests); | |
| 84 $ch = curl_init($queryURL); | |
| 85 curl_setopt($ch,CURLOPT_RETURNTRANSFER,1); | |
| 86 curl_setopt($ch,CURLOPT_TIMEOUT,30); | |
| 87 curl_setopt($ch,CURLOPT_USERAGENT,$this->nominatimAppName); | |
| 88 $curlOutput = curl_exec($ch); | |
| 89 $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE); | |
| 90 curl_close($ch); | |
| 91 | |
| 92 if ($httpcode !== 200) { | |
| 93 $output->write(sprintf('<error>!%s!</error>', $httpcode)); | |
| 94 } else { | |
| 95 $nominatimResult = json_decode($curlOutput, true); | |
| 96 if (sizeof($nominatimResult)) { | |
| 97 $placeResult = $nominatimResult[0]['address']; | |
| 98 if ($i) { | |
| 99 $placeResult['trim'] = $i; | |
| 100 } | |
| 101 } | |
| 102 break; | |
| 103 } | |
| 104 } | |
| 105 if (!$allowedNOfErrors) { | |
| 106 throw new \Exception("Too many consequent errors in \"$queryURL\" :\n $curlOutput"); | |
| 107 } | |
| 108 if ($placeResult) { | |
| 109 break; | |
| 110 } else { | |
| 111 $output->write(sprintf('<info>%s</info>', $i + 1)); | |
| 112 } | |
| 113 } | |
| 114 if ($placeResult) { | |
| 115 $output->writeln('<info>+</info>'); | |
| 116 } else { | |
| 117 $output->writeln('<error>-</error>'); | |
| 118 } | |
| 119 | |
| 120 $result[$placeName] = $placeResult; | |
| 121 | |
| 122 $encodedResult = json_encode($result); | |
| 123 $encodedResult = str_replace(array('null,', '},', '}}'), array("null,\n", "},\n", "}\n}"), $encodedResult); | |
| 124 | |
| 125 file_put_contents($resultPath, $encodedResult); | |
| 126 } | |
| 127 | |
| 128 $output->writeln("Done."); | |
| 129 } | |
| 130 } |
