Daniel@0: setName('dml:views:geography:parse-places')
Daniel@0:             ->setDescription('For each place attribute found at the cliopatria server, collects geo data from OSM Nominatim and saves the result to a web dir')
Daniel@0:             ->addOption(
Daniel@0:                 'reset',
Daniel@0:                 null,
Daniel@0:                 InputOption::VALUE_NONE,
Daniel@0:                 'Resets the result file before the start'
Daniel@0:             )
Daniel@0:             ->addOption(
Daniel@0:                 'process-nulls',
Daniel@0:                 null,
Daniel@0:                 InputOption::VALUE_NONE,
Daniel@0:                 'Re-obtains data for items that were null in the result (could not be geocoded eariler)'
Daniel@0:             )
Daniel@0:         ;
Daniel@0:     }
Daniel@0: 
Daniel@0:     protected function execute(InputInterface $input, OutputInterface $output)
Daniel@0:     {
Daniel@0:         $output->writeln(sprintf("Reading from %s", $this->sourceURL));
Daniel@0: 
Daniel@0:         $sourceData = json_decode(file_get_contents($this->sourceURL), true);
Daniel@0: 
Daniel@0:         $placeNames = array();
Daniel@0:         foreach($sourceData["result"]["places"] as $placeEntity) {
Daniel@0:             array_push($placeNames, $placeEntity["name"]);
Daniel@0:         }
Daniel@0: 
Daniel@0:         $output->writeln(sprintf("%s places found.", sizeof($placeNames)));
Daniel@0: 
Daniel@0:         $resultPath = $this->getContainer()->getParameter("kernel.root_dir") . '/../' . $this->resultRelativePath;
Daniel@0: 
Daniel@0:         $result = array();
Daniel@0:         if (!file_exists($resultPath) || $input->getOption("reset")) {
Daniel@0:             $output->writeln(sprintf("Starting from scratch...", sizeof($placeNames)));
Daniel@0:             $placeNamesToProcess = $placeNames;
Daniel@0:         } else {
Daniel@0:             $result = json_decode(file_get_contents($resultPath), true);
Daniel@0:             $existingPlaceNames = array_keys($result);
Daniel@0:             $placeNamesToProcess = array_diff($placeNames, $existingPlaceNames);
Daniel@0: 
Daniel@0:             if ($input->getOption("process-nulls")) {
Daniel@0:                 foreach($existingPlaceNames as $placeName) {
Daniel@0:                     if ($result[$placeName] == null) {
Daniel@0:                         array_push($placeNamesToProcess, $placeName);
Daniel@0:                     }
Daniel@0:                 }
Daniel@0:             }
Daniel@0:         }
Daniel@0:         $output->writeln(sprintf("%s place names to process.", sizeof($placeNamesToProcess)));
Daniel@0: 
Daniel@0:         foreach($placeNamesToProcess as $placeName) {
Daniel@0:             $output->write(sprintf('%s', $placeName));
Daniel@0:             $chunks = explode(',', str_replace(array("(", ")"), array(",", ""), $placeName));
Daniel@0:             $placeResult = null;
Daniel@0:             for ($i = 0; $i < sizeof($chunks); $i++) {
Daniel@0:                 $query = implode(', ', array_slice($chunks, $i));
Daniel@0:                 $queryURL = sprintf($this->nominatimURLTemplate, urlencode($this->nominatimEmail), urlencode($query));
Daniel@0: 
Daniel@0:                 $allowedNOfErrors = 5;
Daniel@0:                 while (--$allowedNOfErrors) {
Daniel@0:                     sleep($this->secondsBetweenRequests);
Daniel@0:                     $ch = curl_init($queryURL);
Daniel@0:                     curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
Daniel@0:                     curl_setopt($ch,CURLOPT_TIMEOUT,30);
Daniel@0:                     curl_setopt($ch,CURLOPT_USERAGENT,$this->nominatimAppName);
Daniel@0:                     $curlOutput = curl_exec($ch);
Daniel@0:                     $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
Daniel@0:                     curl_close($ch);
Daniel@0: 
Daniel@0:                     if ($httpcode !== 200) {
Daniel@0:                         $output->write(sprintf('!%s!', $httpcode));
Daniel@0:                     } else {
Daniel@0:                         $nominatimResult = json_decode($curlOutput, true);
Daniel@0:                         if (sizeof($nominatimResult)) {
Daniel@0:                             $placeResult = $nominatimResult[0]['address'];
Daniel@0:                             if ($i) {
Daniel@0:                                 $placeResult['trim'] = $i;
Daniel@0:                             }
Daniel@0:                         }
Daniel@0:                         break;
Daniel@0:                     }
Daniel@0:                 }
Daniel@0:                 if (!$allowedNOfErrors) {
Daniel@0:                     throw new \Exception("Too many consequent errors in \"$queryURL\" :\n $curlOutput");
Daniel@0:                 }
Daniel@0:                 if ($placeResult) {
Daniel@0:                     break;
Daniel@0:                 } else {
Daniel@0:                     $output->write(sprintf('%s', $i + 1));
Daniel@0:                 }
Daniel@0:             }
Daniel@0:             if ($placeResult) {
Daniel@0:                 $output->writeln('+');
Daniel@0:             } else {
Daniel@0:                 $output->writeln('-');
Daniel@0:             }
Daniel@0: 
Daniel@0:             $result[$placeName] = $placeResult;
Daniel@0: 
Daniel@0:             $encodedResult = json_encode($result);
Daniel@0:             $encodedResult = str_replace(array('null,', '},', '}}'), array("null,\n", "},\n", "}\n}"), $encodedResult);
Daniel@0: 
Daniel@0:             file_put_contents($resultPath, $encodedResult);
Daniel@0:         }
Daniel@0: 
Daniel@0:         $output->writeln("Done.");
Daniel@0:     }
Daniel@0: }