diff src/DML/MainVisBundle/Command/Views/Geography/ParsePlacesCommand.php @ 0:493bcb69166c

added public content
author Daniel Wolff
date Tue, 09 Feb 2016 20:54:02 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/DML/MainVisBundle/Command/Views/Geography/ParsePlacesCommand.php	Tue Feb 09 20:54:02 2016 +0100
@@ -0,0 +1,130 @@
+<?php
+namespace DML\MainVisBundle\Command\Views\Geography;
+
+use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
+use Symfony\Component\Console\Input\InputArgument;
+use Symfony\Component\Console\Input\InputInterface;
+use Symfony\Component\Console\Input\InputOption;
+use Symfony\Component\Console\Output\OutputInterface;
+
+class ParsePlacesCommand extends ContainerAwareCommand
+{
+    protected $sourceURL = "http://mirg.city.ac.uk/cp/api/v4/listPlaces?format=json";
+    protected $nominatimURLTemplate = "http://nominatim.openstreetmap.org/search?format=json&limit=1&addressdetails=1&email=%s&q=%s";
+    protected $nominatimEmail = "alexander.kachkaev@city.ac.uk";
+    protected $nominatimAppName = "DML VIS (place name caching script)"; //
+    protected $resultRelativePath   = "$/views/geography/parsedPlaces.json";
+    protected $secondsBetweenRequests = 1;
+
+    protected function configure()
+    {
+        $this
+            ->setName('dml:views:geography:parse-places')
+            ->setDescription('For each place attribute found at the cliopatria server, collects geo data from OSM Nominatim and saves the result to a web dir')
+            ->addOption(
+                'reset',
+                null,
+                InputOption::VALUE_NONE,
+                'Resets the result file before the start'
+            )
+            ->addOption(
+                'process-nulls',
+                null,
+                InputOption::VALUE_NONE,
+                'Re-obtains data for items that were null in the result (could not be geocoded eariler)'
+            )
+        ;
+    }
+
+    protected function execute(InputInterface $input, OutputInterface $output)
+    {
+        $output->writeln(sprintf("Reading from <comment>%s</comment>", $this->sourceURL));
+
+        $sourceData = json_decode(file_get_contents($this->sourceURL), true);
+
+        $placeNames = array();
+        foreach($sourceData["result"]["places"] as $placeEntity) {
+            array_push($placeNames, $placeEntity["name"]);
+        }
+
+        $output->writeln(sprintf("<comment>%s</comment> places found.", sizeof($placeNames)));
+
+        $resultPath = $this->getContainer()->getParameter("kernel.root_dir") . '/../' . $this->resultRelativePath;
+
+        $result = array();
+        if (!file_exists($resultPath) || $input->getOption("reset")) {
+            $output->writeln(sprintf("Starting from scratch...", sizeof($placeNames)));
+            $placeNamesToProcess = $placeNames;
+        } else {
+            $result = json_decode(file_get_contents($resultPath), true);
+            $existingPlaceNames = array_keys($result);
+            $placeNamesToProcess = array_diff($placeNames, $existingPlaceNames);
+
+            if ($input->getOption("process-nulls")) {
+                foreach($existingPlaceNames as $placeName) {
+                    if ($result[$placeName] == null) {
+                        array_push($placeNamesToProcess, $placeName);
+                    }
+                }
+            }
+        }
+        $output->writeln(sprintf("<comment>%s</comment> place names to process.", sizeof($placeNamesToProcess)));
+
+        foreach($placeNamesToProcess as $placeName) {
+            $output->write(sprintf('<comment>%s</comment>', $placeName));
+            $chunks = explode(',', str_replace(array("(", ")"), array(",", ""), $placeName));
+            $placeResult = null;
+            for ($i = 0; $i < sizeof($chunks); $i++) {
+                $query = implode(', ', array_slice($chunks, $i));
+                $queryURL = sprintf($this->nominatimURLTemplate, urlencode($this->nominatimEmail), urlencode($query));
+
+                $allowedNOfErrors = 5;
+                while (--$allowedNOfErrors) {
+                    sleep($this->secondsBetweenRequests);
+                    $ch = curl_init($queryURL);
+                    curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
+                    curl_setopt($ch,CURLOPT_TIMEOUT,30);
+                    curl_setopt($ch,CURLOPT_USERAGENT,$this->nominatimAppName);
+                    $curlOutput = curl_exec($ch);
+                    $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
+                    curl_close($ch);
+
+                    if ($httpcode !== 200) {
+                        $output->write(sprintf('<error>!%s!</error>', $httpcode));
+                    } else {
+                        $nominatimResult = json_decode($curlOutput, true);
+                        if (sizeof($nominatimResult)) {
+                            $placeResult = $nominatimResult[0]['address'];
+                            if ($i) {
+                                $placeResult['trim'] = $i;
+                            }
+                        }
+                        break;
+                    }
+                }
+                if (!$allowedNOfErrors) {
+                    throw new \Exception("Too many consequent errors in \"$queryURL\" :\n $curlOutput");
+                }
+                if ($placeResult) {
+                    break;
+                } else {
+                    $output->write(sprintf('<info>%s</info>', $i + 1));
+                }
+            }
+            if ($placeResult) {
+                $output->writeln('<info>+</info>');
+            } else {
+                $output->writeln('<error>-</error>');
+            }
+
+            $result[$placeName] = $placeResult;
+
+            $encodedResult = json_encode($result);
+            $encodedResult = str_replace(array('null,', '},', '}}'), array("null,\n", "},\n", "}\n}"), $encodedResult);
+
+            file_put_contents($resultPath, $encodedResult);
+        }
+
+        $output->writeln("Done.");
+    }
+}
\ No newline at end of file