view src/DML/MainVisBundle/Command/Views/Geography/ParsePlacesCommand.php @ 1:f38015048f48 tip

Added GPL
author Daniel Wolff
date Sat, 13 Feb 2016 20:43:38 +0100
parents 493bcb69166c
children
line wrap: on
line source
<?php
namespace DML\MainVisBundle\Command\Views\Geography;

use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;

class ParsePlacesCommand extends ContainerAwareCommand
{
    protected $sourceURL = "http://mirg.city.ac.uk/cp/api/v4/listPlaces?format=json";
    protected $nominatimURLTemplate = "http://nominatim.openstreetmap.org/search?format=json&limit=1&addressdetails=1&email=%s&q=%s";
    protected $nominatimEmail = "alexander.kachkaev@city.ac.uk";
    protected $nominatimAppName = "DML VIS (place name caching script)"; //
    protected $resultRelativePath   = "$/views/geography/parsedPlaces.json";
    protected $secondsBetweenRequests = 1;

    protected function configure()
    {
        $this
            ->setName('dml:views:geography:parse-places')
            ->setDescription('For each place attribute found at the cliopatria server, collects geo data from OSM Nominatim and saves the result to a web dir')
            ->addOption(
                'reset',
                null,
                InputOption::VALUE_NONE,
                'Resets the result file before the start'
            )
            ->addOption(
                'process-nulls',
                null,
                InputOption::VALUE_NONE,
                'Re-obtains data for items that were null in the result (could not be geocoded eariler)'
            )
        ;
    }

    protected function execute(InputInterface $input, OutputInterface $output)
    {
        $output->writeln(sprintf("Reading from <comment>%s</comment>", $this->sourceURL));

        $sourceData = json_decode(file_get_contents($this->sourceURL), true);

        $placeNames = array();
        foreach($sourceData["result"]["places"] as $placeEntity) {
            array_push($placeNames, $placeEntity["name"]);
        }

        $output->writeln(sprintf("<comment>%s</comment> places found.", sizeof($placeNames)));

        $resultPath = $this->getContainer()->getParameter("kernel.root_dir") . '/../' . $this->resultRelativePath;

        $result = array();
        if (!file_exists($resultPath) || $input->getOption("reset")) {
            $output->writeln(sprintf("Starting from scratch...", sizeof($placeNames)));
            $placeNamesToProcess = $placeNames;
        } else {
            $result = json_decode(file_get_contents($resultPath), true);
            $existingPlaceNames = array_keys($result);
            $placeNamesToProcess = array_diff($placeNames, $existingPlaceNames);

            if ($input->getOption("process-nulls")) {
                foreach($existingPlaceNames as $placeName) {
                    if ($result[$placeName] == null) {
                        array_push($placeNamesToProcess, $placeName);
                    }
                }
            }
        }
        $output->writeln(sprintf("<comment>%s</comment> place names to process.", sizeof($placeNamesToProcess)));

        foreach($placeNamesToProcess as $placeName) {
            $output->write(sprintf('<comment>%s</comment>', $placeName));
            $chunks = explode(',', str_replace(array("(", ")"), array(",", ""), $placeName));
            $placeResult = null;
            for ($i = 0; $i < sizeof($chunks); $i++) {
                $query = implode(', ', array_slice($chunks, $i));
                $queryURL = sprintf($this->nominatimURLTemplate, urlencode($this->nominatimEmail), urlencode($query));

                $allowedNOfErrors = 5;
                while (--$allowedNOfErrors) {
                    sleep($this->secondsBetweenRequests);
                    $ch = curl_init($queryURL);
                    curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
                    curl_setopt($ch,CURLOPT_TIMEOUT,30);
                    curl_setopt($ch,CURLOPT_USERAGENT,$this->nominatimAppName);
                    $curlOutput = curl_exec($ch);
                    $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
                    curl_close($ch);

                    if ($httpcode !== 200) {
                        $output->write(sprintf('<error>!%s!</error>', $httpcode));
                    } else {
                        $nominatimResult = json_decode($curlOutput, true);
                        if (sizeof($nominatimResult)) {
                            $placeResult = $nominatimResult[0]['address'];
                            if ($i) {
                                $placeResult['trim'] = $i;
                            }
                        }
                        break;
                    }
                }
                if (!$allowedNOfErrors) {
                    throw new \Exception("Too many consequent errors in \"$queryURL\" :\n $curlOutput");
                }
                if ($placeResult) {
                    break;
                } else {
                    $output->write(sprintf('<info>%s</info>', $i + 1));
                }
            }
            if ($placeResult) {
                $output->writeln('<info>+</info>');
            } else {
                $output->writeln('<error>-</error>');
            }

            $result[$placeName] = $placeResult;

            $encodedResult = json_encode($result);
            $encodedResult = str_replace(array('null,', '},', '}}'), array("null,\n", "},\n", "}\n}"), $encodedResult);

            file_put_contents($resultPath, $encodedResult);
        }

        $output->writeln("Done.");
    }
}