| Daniel@0 | 1 <?php | 
| Daniel@0 | 2 namespace DML\MainVisBundle\Command\Views\Geography; | 
| Daniel@0 | 3 | 
| Daniel@0 | 4 use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand; | 
| Daniel@0 | 5 use Symfony\Component\Console\Input\InputArgument; | 
| Daniel@0 | 6 use Symfony\Component\Console\Input\InputInterface; | 
| Daniel@0 | 7 use Symfony\Component\Console\Input\InputOption; | 
| Daniel@0 | 8 use Symfony\Component\Console\Output\OutputInterface; | 
| Daniel@0 | 9 | 
| Daniel@0 | 10 class ParsePlacesCommand extends ContainerAwareCommand | 
| Daniel@0 | 11 { | 
| Daniel@0 | 12     protected $sourceURL = "http://mirg.city.ac.uk/cp/api/v4/listPlaces?format=json"; | 
| Daniel@0 | 13     protected $nominatimURLTemplate = "http://nominatim.openstreetmap.org/search?format=json&limit=1&addressdetails=1&email=%s&q=%s"; | 
| Daniel@0 | 14     protected $nominatimEmail = "alexander.kachkaev@city.ac.uk"; | 
| Daniel@0 | 15     protected $nominatimAppName = "DML VIS (place name caching script)"; // | 
| Daniel@0 | 16     protected $resultRelativePath   = "$/views/geography/parsedPlaces.json"; | 
| Daniel@0 | 17     protected $secondsBetweenRequests = 1; | 
| Daniel@0 | 18 | 
| Daniel@0 | 19     protected function configure() | 
| Daniel@0 | 20     { | 
| Daniel@0 | 21         $this | 
| Daniel@0 | 22             ->setName('dml:views:geography:parse-places') | 
| Daniel@0 | 23             ->setDescription('For each place attribute found at the cliopatria server, collects geo data from OSM Nominatim and saves the result to a web dir') | 
| Daniel@0 | 24             ->addOption( | 
| Daniel@0 | 25                 'reset', | 
| Daniel@0 | 26                 null, | 
| Daniel@0 | 27                 InputOption::VALUE_NONE, | 
| Daniel@0 | 28                 'Resets the result file before the start' | 
| Daniel@0 | 29             ) | 
| Daniel@0 | 30             ->addOption( | 
| Daniel@0 | 31                 'process-nulls', | 
| Daniel@0 | 32                 null, | 
| Daniel@0 | 33                 InputOption::VALUE_NONE, | 
| Daniel@0 | 34                 'Re-obtains data for items that were null in the result (could not be geocoded eariler)' | 
| Daniel@0 | 35             ) | 
| Daniel@0 | 36         ; | 
| Daniel@0 | 37     } | 
| Daniel@0 | 38 | 
| Daniel@0 | 39     protected function execute(InputInterface $input, OutputInterface $output) | 
| Daniel@0 | 40     { | 
| Daniel@0 | 41         $output->writeln(sprintf("Reading from <comment>%s</comment>", $this->sourceURL)); | 
| Daniel@0 | 42 | 
| Daniel@0 | 43         $sourceData = json_decode(file_get_contents($this->sourceURL), true); | 
| Daniel@0 | 44 | 
| Daniel@0 | 45         $placeNames = array(); | 
| Daniel@0 | 46         foreach($sourceData["result"]["places"] as $placeEntity) { | 
| Daniel@0 | 47             array_push($placeNames, $placeEntity["name"]); | 
| Daniel@0 | 48         } | 
| Daniel@0 | 49 | 
| Daniel@0 | 50         $output->writeln(sprintf("<comment>%s</comment> places found.", sizeof($placeNames))); | 
| Daniel@0 | 51 | 
| Daniel@0 | 52         $resultPath = $this->getContainer()->getParameter("kernel.root_dir") . '/../' . $this->resultRelativePath; | 
| Daniel@0 | 53 | 
| Daniel@0 | 54         $result = array(); | 
| Daniel@0 | 55         if (!file_exists($resultPath) || $input->getOption("reset")) { | 
| Daniel@0 | 56             $output->writeln(sprintf("Starting from scratch...", sizeof($placeNames))); | 
| Daniel@0 | 57             $placeNamesToProcess = $placeNames; | 
| Daniel@0 | 58         } else { | 
| Daniel@0 | 59             $result = json_decode(file_get_contents($resultPath), true); | 
| Daniel@0 | 60             $existingPlaceNames = array_keys($result); | 
| Daniel@0 | 61             $placeNamesToProcess = array_diff($placeNames, $existingPlaceNames); | 
| Daniel@0 | 62 | 
| Daniel@0 | 63             if ($input->getOption("process-nulls")) { | 
| Daniel@0 | 64                 foreach($existingPlaceNames as $placeName) { | 
| Daniel@0 | 65                     if ($result[$placeName] == null) { | 
| Daniel@0 | 66                         array_push($placeNamesToProcess, $placeName); | 
| Daniel@0 | 67                     } | 
| Daniel@0 | 68                 } | 
| Daniel@0 | 69             } | 
| Daniel@0 | 70         } | 
| Daniel@0 | 71         $output->writeln(sprintf("<comment>%s</comment> place names to process.", sizeof($placeNamesToProcess))); | 
| Daniel@0 | 72 | 
| Daniel@0 | 73         foreach($placeNamesToProcess as $placeName) { | 
| Daniel@0 | 74             $output->write(sprintf('<comment>%s</comment>', $placeName)); | 
| Daniel@0 | 75             $chunks = explode(',', str_replace(array("(", ")"), array(",", ""), $placeName)); | 
| Daniel@0 | 76             $placeResult = null; | 
| Daniel@0 | 77             for ($i = 0; $i < sizeof($chunks); $i++) { | 
| Daniel@0 | 78                 $query = implode(', ', array_slice($chunks, $i)); | 
| Daniel@0 | 79                 $queryURL = sprintf($this->nominatimURLTemplate, urlencode($this->nominatimEmail), urlencode($query)); | 
| Daniel@0 | 80 | 
| Daniel@0 | 81                 $allowedNOfErrors = 5; | 
| Daniel@0 | 82                 while (--$allowedNOfErrors) { | 
| Daniel@0 | 83                     sleep($this->secondsBetweenRequests); | 
| Daniel@0 | 84                     $ch = curl_init($queryURL); | 
| Daniel@0 | 85                     curl_setopt($ch,CURLOPT_RETURNTRANSFER,1); | 
| Daniel@0 | 86                     curl_setopt($ch,CURLOPT_TIMEOUT,30); | 
| Daniel@0 | 87                     curl_setopt($ch,CURLOPT_USERAGENT,$this->nominatimAppName); | 
| Daniel@0 | 88                     $curlOutput = curl_exec($ch); | 
| Daniel@0 | 89                     $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE); | 
| Daniel@0 | 90                     curl_close($ch); | 
| Daniel@0 | 91 | 
| Daniel@0 | 92                     if ($httpcode !== 200) { | 
| Daniel@0 | 93                         $output->write(sprintf('<error>!%s!</error>', $httpcode)); | 
| Daniel@0 | 94                     } else { | 
| Daniel@0 | 95                         $nominatimResult = json_decode($curlOutput, true); | 
| Daniel@0 | 96                         if (sizeof($nominatimResult)) { | 
| Daniel@0 | 97                             $placeResult = $nominatimResult[0]['address']; | 
| Daniel@0 | 98                             if ($i) { | 
| Daniel@0 | 99                                 $placeResult['trim'] = $i; | 
| Daniel@0 | 100                             } | 
| Daniel@0 | 101                         } | 
| Daniel@0 | 102                         break; | 
| Daniel@0 | 103                     } | 
| Daniel@0 | 104                 } | 
| Daniel@0 | 105                 if (!$allowedNOfErrors) { | 
| Daniel@0 | 106                     throw new \Exception("Too many consequent errors in \"$queryURL\" :\n $curlOutput"); | 
| Daniel@0 | 107                 } | 
| Daniel@0 | 108                 if ($placeResult) { | 
| Daniel@0 | 109                     break; | 
| Daniel@0 | 110                 } else { | 
| Daniel@0 | 111                     $output->write(sprintf('<info>%s</info>', $i + 1)); | 
| Daniel@0 | 112                 } | 
| Daniel@0 | 113             } | 
| Daniel@0 | 114             if ($placeResult) { | 
| Daniel@0 | 115                 $output->writeln('<info>+</info>'); | 
| Daniel@0 | 116             } else { | 
| Daniel@0 | 117                 $output->writeln('<error>-</error>'); | 
| Daniel@0 | 118             } | 
| Daniel@0 | 119 | 
| Daniel@0 | 120             $result[$placeName] = $placeResult; | 
| Daniel@0 | 121 | 
| Daniel@0 | 122             $encodedResult = json_encode($result); | 
| Daniel@0 | 123             $encodedResult = str_replace(array('null,', '},', '}}'), array("null,\n", "},\n", "}\n}"), $encodedResult); | 
| Daniel@0 | 124 | 
| Daniel@0 | 125             file_put_contents($resultPath, $encodedResult); | 
| Daniel@0 | 126         } | 
| Daniel@0 | 127 | 
| Daniel@0 | 128         $output->writeln("Done."); | 
| Daniel@0 | 129     } | 
| Daniel@0 | 130 } |