Mercurial > hg > dml-open-vis
diff src/DML/MainVisBundle/Command/Views/Geography/ParsePlacesCommand.php @ 0:493bcb69166c
added public content
author | Daniel Wolff |
---|---|
date | Tue, 09 Feb 2016 20:54:02 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/DML/MainVisBundle/Command/Views/Geography/ParsePlacesCommand.php Tue Feb 09 20:54:02 2016 +0100 @@ -0,0 +1,130 @@ +<?php +namespace DML\MainVisBundle\Command\Views\Geography; + +use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand; +use Symfony\Component\Console\Input\InputArgument; +use Symfony\Component\Console\Input\InputInterface; +use Symfony\Component\Console\Input\InputOption; +use Symfony\Component\Console\Output\OutputInterface; + +class ParsePlacesCommand extends ContainerAwareCommand +{ + protected $sourceURL = "http://mirg.city.ac.uk/cp/api/v4/listPlaces?format=json"; + protected $nominatimURLTemplate = "http://nominatim.openstreetmap.org/search?format=json&limit=1&addressdetails=1&email=%s&q=%s"; + protected $nominatimEmail = "alexander.kachkaev@city.ac.uk"; + protected $nominatimAppName = "DML VIS (place name caching script)"; // + protected $resultRelativePath = "$/views/geography/parsedPlaces.json"; + protected $secondsBetweenRequests = 1; + + protected function configure() + { + $this + ->setName('dml:views:geography:parse-places') + ->setDescription('For each place attribute found at the cliopatria server, collects geo data from OSM Nominatim and saves the result to a web dir') + ->addOption( + 'reset', + null, + InputOption::VALUE_NONE, + 'Resets the result file before the start' + ) + ->addOption( + 'process-nulls', + null, + InputOption::VALUE_NONE, + 'Re-obtains data for items that were null in the result (could not be geocoded eariler)' + ) + ; + } + + protected function execute(InputInterface $input, OutputInterface $output) + { + $output->writeln(sprintf("Reading from <comment>%s</comment>", $this->sourceURL)); + + $sourceData = json_decode(file_get_contents($this->sourceURL), true); + + $placeNames = array(); + foreach($sourceData["result"]["places"] as $placeEntity) { + array_push($placeNames, $placeEntity["name"]); + } + + $output->writeln(sprintf("<comment>%s</comment> places found.", sizeof($placeNames))); + + $resultPath = $this->getContainer()->getParameter("kernel.root_dir") . '/../' . $this->resultRelativePath; + + $result = array(); + if (!file_exists($resultPath) || $input->getOption("reset")) { + $output->writeln(sprintf("Starting from scratch...", sizeof($placeNames))); + $placeNamesToProcess = $placeNames; + } else { + $result = json_decode(file_get_contents($resultPath), true); + $existingPlaceNames = array_keys($result); + $placeNamesToProcess = array_diff($placeNames, $existingPlaceNames); + + if ($input->getOption("process-nulls")) { + foreach($existingPlaceNames as $placeName) { + if ($result[$placeName] == null) { + array_push($placeNamesToProcess, $placeName); + } + } + } + } + $output->writeln(sprintf("<comment>%s</comment> place names to process.", sizeof($placeNamesToProcess))); + + foreach($placeNamesToProcess as $placeName) { + $output->write(sprintf('<comment>%s</comment>', $placeName)); + $chunks = explode(',', str_replace(array("(", ")"), array(",", ""), $placeName)); + $placeResult = null; + for ($i = 0; $i < sizeof($chunks); $i++) { + $query = implode(', ', array_slice($chunks, $i)); + $queryURL = sprintf($this->nominatimURLTemplate, urlencode($this->nominatimEmail), urlencode($query)); + + $allowedNOfErrors = 5; + while (--$allowedNOfErrors) { + sleep($this->secondsBetweenRequests); + $ch = curl_init($queryURL); + curl_setopt($ch,CURLOPT_RETURNTRANSFER,1); + curl_setopt($ch,CURLOPT_TIMEOUT,30); + curl_setopt($ch,CURLOPT_USERAGENT,$this->nominatimAppName); + $curlOutput = curl_exec($ch); + $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE); + curl_close($ch); + + if ($httpcode !== 200) { + $output->write(sprintf('<error>!%s!</error>', $httpcode)); + } else { + $nominatimResult = json_decode($curlOutput, true); + if (sizeof($nominatimResult)) { + $placeResult = $nominatimResult[0]['address']; + if ($i) { + $placeResult['trim'] = $i; + } + } + break; + } + } + if (!$allowedNOfErrors) { + throw new \Exception("Too many consequent errors in \"$queryURL\" :\n $curlOutput"); + } + if ($placeResult) { + break; + } else { + $output->write(sprintf('<info>%s</info>', $i + 1)); + } + } + if ($placeResult) { + $output->writeln('<info>+</info>'); + } else { + $output->writeln('<error>-</error>'); + } + + $result[$placeName] = $placeResult; + + $encodedResult = json_encode($result); + $encodedResult = str_replace(array('null,', '},', '}}'), array("null,\n", "},\n", "}\n}"), $encodedResult); + + file_put_contents($resultPath, $encodedResult); + } + + $output->writeln("Done."); + } +} \ No newline at end of file