annotate src/DML/MainVisBundle/Command/Views/Geography/ParsePlacesCommand.php @ 0:493bcb69166c

added public content
author Daniel Wolff
date Tue, 09 Feb 2016 20:54:02 +0100
parents
children
rev   line source
Daniel@0 1 <?php
Daniel@0 2 namespace DML\MainVisBundle\Command\Views\Geography;
Daniel@0 3
Daniel@0 4 use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
Daniel@0 5 use Symfony\Component\Console\Input\InputArgument;
Daniel@0 6 use Symfony\Component\Console\Input\InputInterface;
Daniel@0 7 use Symfony\Component\Console\Input\InputOption;
Daniel@0 8 use Symfony\Component\Console\Output\OutputInterface;
Daniel@0 9
Daniel@0 10 class ParsePlacesCommand extends ContainerAwareCommand
Daniel@0 11 {
Daniel@0 12 protected $sourceURL = "http://mirg.city.ac.uk/cp/api/v4/listPlaces?format=json";
Daniel@0 13 protected $nominatimURLTemplate = "http://nominatim.openstreetmap.org/search?format=json&limit=1&addressdetails=1&email=%s&q=%s";
Daniel@0 14 protected $nominatimEmail = "alexander.kachkaev@city.ac.uk";
Daniel@0 15 protected $nominatimAppName = "DML VIS (place name caching script)"; //
Daniel@0 16 protected $resultRelativePath = "$/views/geography/parsedPlaces.json";
Daniel@0 17 protected $secondsBetweenRequests = 1;
Daniel@0 18
Daniel@0 19 protected function configure()
Daniel@0 20 {
Daniel@0 21 $this
Daniel@0 22 ->setName('dml:views:geography:parse-places')
Daniel@0 23 ->setDescription('For each place attribute found at the cliopatria server, collects geo data from OSM Nominatim and saves the result to a web dir')
Daniel@0 24 ->addOption(
Daniel@0 25 'reset',
Daniel@0 26 null,
Daniel@0 27 InputOption::VALUE_NONE,
Daniel@0 28 'Resets the result file before the start'
Daniel@0 29 )
Daniel@0 30 ->addOption(
Daniel@0 31 'process-nulls',
Daniel@0 32 null,
Daniel@0 33 InputOption::VALUE_NONE,
Daniel@0 34 'Re-obtains data for items that were null in the result (could not be geocoded eariler)'
Daniel@0 35 )
Daniel@0 36 ;
Daniel@0 37 }
Daniel@0 38
Daniel@0 39 protected function execute(InputInterface $input, OutputInterface $output)
Daniel@0 40 {
Daniel@0 41 $output->writeln(sprintf("Reading from <comment>%s</comment>", $this->sourceURL));
Daniel@0 42
Daniel@0 43 $sourceData = json_decode(file_get_contents($this->sourceURL), true);
Daniel@0 44
Daniel@0 45 $placeNames = array();
Daniel@0 46 foreach($sourceData["result"]["places"] as $placeEntity) {
Daniel@0 47 array_push($placeNames, $placeEntity["name"]);
Daniel@0 48 }
Daniel@0 49
Daniel@0 50 $output->writeln(sprintf("<comment>%s</comment> places found.", sizeof($placeNames)));
Daniel@0 51
Daniel@0 52 $resultPath = $this->getContainer()->getParameter("kernel.root_dir") . '/../' . $this->resultRelativePath;
Daniel@0 53
Daniel@0 54 $result = array();
Daniel@0 55 if (!file_exists($resultPath) || $input->getOption("reset")) {
Daniel@0 56 $output->writeln(sprintf("Starting from scratch...", sizeof($placeNames)));
Daniel@0 57 $placeNamesToProcess = $placeNames;
Daniel@0 58 } else {
Daniel@0 59 $result = json_decode(file_get_contents($resultPath), true);
Daniel@0 60 $existingPlaceNames = array_keys($result);
Daniel@0 61 $placeNamesToProcess = array_diff($placeNames, $existingPlaceNames);
Daniel@0 62
Daniel@0 63 if ($input->getOption("process-nulls")) {
Daniel@0 64 foreach($existingPlaceNames as $placeName) {
Daniel@0 65 if ($result[$placeName] == null) {
Daniel@0 66 array_push($placeNamesToProcess, $placeName);
Daniel@0 67 }
Daniel@0 68 }
Daniel@0 69 }
Daniel@0 70 }
Daniel@0 71 $output->writeln(sprintf("<comment>%s</comment> place names to process.", sizeof($placeNamesToProcess)));
Daniel@0 72
Daniel@0 73 foreach($placeNamesToProcess as $placeName) {
Daniel@0 74 $output->write(sprintf('<comment>%s</comment>', $placeName));
Daniel@0 75 $chunks = explode(',', str_replace(array("(", ")"), array(",", ""), $placeName));
Daniel@0 76 $placeResult = null;
Daniel@0 77 for ($i = 0; $i < sizeof($chunks); $i++) {
Daniel@0 78 $query = implode(', ', array_slice($chunks, $i));
Daniel@0 79 $queryURL = sprintf($this->nominatimURLTemplate, urlencode($this->nominatimEmail), urlencode($query));
Daniel@0 80
Daniel@0 81 $allowedNOfErrors = 5;
Daniel@0 82 while (--$allowedNOfErrors) {
Daniel@0 83 sleep($this->secondsBetweenRequests);
Daniel@0 84 $ch = curl_init($queryURL);
Daniel@0 85 curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
Daniel@0 86 curl_setopt($ch,CURLOPT_TIMEOUT,30);
Daniel@0 87 curl_setopt($ch,CURLOPT_USERAGENT,$this->nominatimAppName);
Daniel@0 88 $curlOutput = curl_exec($ch);
Daniel@0 89 $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
Daniel@0 90 curl_close($ch);
Daniel@0 91
Daniel@0 92 if ($httpcode !== 200) {
Daniel@0 93 $output->write(sprintf('<error>!%s!</error>', $httpcode));
Daniel@0 94 } else {
Daniel@0 95 $nominatimResult = json_decode($curlOutput, true);
Daniel@0 96 if (sizeof($nominatimResult)) {
Daniel@0 97 $placeResult = $nominatimResult[0]['address'];
Daniel@0 98 if ($i) {
Daniel@0 99 $placeResult['trim'] = $i;
Daniel@0 100 }
Daniel@0 101 }
Daniel@0 102 break;
Daniel@0 103 }
Daniel@0 104 }
Daniel@0 105 if (!$allowedNOfErrors) {
Daniel@0 106 throw new \Exception("Too many consequent errors in \"$queryURL\" :\n $curlOutput");
Daniel@0 107 }
Daniel@0 108 if ($placeResult) {
Daniel@0 109 break;
Daniel@0 110 } else {
Daniel@0 111 $output->write(sprintf('<info>%s</info>', $i + 1));
Daniel@0 112 }
Daniel@0 113 }
Daniel@0 114 if ($placeResult) {
Daniel@0 115 $output->writeln('<info>+</info>');
Daniel@0 116 } else {
Daniel@0 117 $output->writeln('<error>-</error>');
Daniel@0 118 }
Daniel@0 119
Daniel@0 120 $result[$placeName] = $placeResult;
Daniel@0 121
Daniel@0 122 $encodedResult = json_encode($result);
Daniel@0 123 $encodedResult = str_replace(array('null,', '},', '}}'), array("null,\n", "},\n", "}\n}"), $encodedResult);
Daniel@0 124
Daniel@0 125 file_put_contents($resultPath, $encodedResult);
Daniel@0 126 }
Daniel@0 127
Daniel@0 128 $output->writeln("Done.");
Daniel@0 129 }
Daniel@0 130 }