Daniel@0
|
1 <?php
|
Daniel@0
|
2 namespace DML\MainVisBundle\Command\Views\Geography;
|
Daniel@0
|
3
|
Daniel@0
|
4 use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
|
Daniel@0
|
5 use Symfony\Component\Console\Input\InputArgument;
|
Daniel@0
|
6 use Symfony\Component\Console\Input\InputInterface;
|
Daniel@0
|
7 use Symfony\Component\Console\Input\InputOption;
|
Daniel@0
|
8 use Symfony\Component\Console\Output\OutputInterface;
|
Daniel@0
|
9
|
Daniel@0
|
10 class ParsePlacesCommand extends ContainerAwareCommand
|
Daniel@0
|
11 {
|
Daniel@0
|
12 protected $sourceURL = "http://mirg.city.ac.uk/cp/api/v4/listPlaces?format=json";
|
Daniel@0
|
13 protected $nominatimURLTemplate = "http://nominatim.openstreetmap.org/search?format=json&limit=1&addressdetails=1&email=%s&q=%s";
|
Daniel@0
|
14 protected $nominatimEmail = "alexander.kachkaev@city.ac.uk";
|
Daniel@0
|
15 protected $nominatimAppName = "DML VIS (place name caching script)"; //
|
Daniel@0
|
16 protected $resultRelativePath = "$/views/geography/parsedPlaces.json";
|
Daniel@0
|
17 protected $secondsBetweenRequests = 1;
|
Daniel@0
|
18
|
Daniel@0
|
19 protected function configure()
|
Daniel@0
|
20 {
|
Daniel@0
|
21 $this
|
Daniel@0
|
22 ->setName('dml:views:geography:parse-places')
|
Daniel@0
|
23 ->setDescription('For each place attribute found at the cliopatria server, collects geo data from OSM Nominatim and saves the result to a web dir')
|
Daniel@0
|
24 ->addOption(
|
Daniel@0
|
25 'reset',
|
Daniel@0
|
26 null,
|
Daniel@0
|
27 InputOption::VALUE_NONE,
|
Daniel@0
|
28 'Resets the result file before the start'
|
Daniel@0
|
29 )
|
Daniel@0
|
30 ->addOption(
|
Daniel@0
|
31 'process-nulls',
|
Daniel@0
|
32 null,
|
Daniel@0
|
33 InputOption::VALUE_NONE,
|
Daniel@0
|
34 'Re-obtains data for items that were null in the result (could not be geocoded eariler)'
|
Daniel@0
|
35 )
|
Daniel@0
|
36 ;
|
Daniel@0
|
37 }
|
Daniel@0
|
38
|
Daniel@0
|
39 protected function execute(InputInterface $input, OutputInterface $output)
|
Daniel@0
|
40 {
|
Daniel@0
|
41 $output->writeln(sprintf("Reading from <comment>%s</comment>", $this->sourceURL));
|
Daniel@0
|
42
|
Daniel@0
|
43 $sourceData = json_decode(file_get_contents($this->sourceURL), true);
|
Daniel@0
|
44
|
Daniel@0
|
45 $placeNames = array();
|
Daniel@0
|
46 foreach($sourceData["result"]["places"] as $placeEntity) {
|
Daniel@0
|
47 array_push($placeNames, $placeEntity["name"]);
|
Daniel@0
|
48 }
|
Daniel@0
|
49
|
Daniel@0
|
50 $output->writeln(sprintf("<comment>%s</comment> places found.", sizeof($placeNames)));
|
Daniel@0
|
51
|
Daniel@0
|
52 $resultPath = $this->getContainer()->getParameter("kernel.root_dir") . '/../' . $this->resultRelativePath;
|
Daniel@0
|
53
|
Daniel@0
|
54 $result = array();
|
Daniel@0
|
55 if (!file_exists($resultPath) || $input->getOption("reset")) {
|
Daniel@0
|
56 $output->writeln(sprintf("Starting from scratch...", sizeof($placeNames)));
|
Daniel@0
|
57 $placeNamesToProcess = $placeNames;
|
Daniel@0
|
58 } else {
|
Daniel@0
|
59 $result = json_decode(file_get_contents($resultPath), true);
|
Daniel@0
|
60 $existingPlaceNames = array_keys($result);
|
Daniel@0
|
61 $placeNamesToProcess = array_diff($placeNames, $existingPlaceNames);
|
Daniel@0
|
62
|
Daniel@0
|
63 if ($input->getOption("process-nulls")) {
|
Daniel@0
|
64 foreach($existingPlaceNames as $placeName) {
|
Daniel@0
|
65 if ($result[$placeName] == null) {
|
Daniel@0
|
66 array_push($placeNamesToProcess, $placeName);
|
Daniel@0
|
67 }
|
Daniel@0
|
68 }
|
Daniel@0
|
69 }
|
Daniel@0
|
70 }
|
Daniel@0
|
71 $output->writeln(sprintf("<comment>%s</comment> place names to process.", sizeof($placeNamesToProcess)));
|
Daniel@0
|
72
|
Daniel@0
|
73 foreach($placeNamesToProcess as $placeName) {
|
Daniel@0
|
74 $output->write(sprintf('<comment>%s</comment>', $placeName));
|
Daniel@0
|
75 $chunks = explode(',', str_replace(array("(", ")"), array(",", ""), $placeName));
|
Daniel@0
|
76 $placeResult = null;
|
Daniel@0
|
77 for ($i = 0; $i < sizeof($chunks); $i++) {
|
Daniel@0
|
78 $query = implode(', ', array_slice($chunks, $i));
|
Daniel@0
|
79 $queryURL = sprintf($this->nominatimURLTemplate, urlencode($this->nominatimEmail), urlencode($query));
|
Daniel@0
|
80
|
Daniel@0
|
81 $allowedNOfErrors = 5;
|
Daniel@0
|
82 while (--$allowedNOfErrors) {
|
Daniel@0
|
83 sleep($this->secondsBetweenRequests);
|
Daniel@0
|
84 $ch = curl_init($queryURL);
|
Daniel@0
|
85 curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
|
Daniel@0
|
86 curl_setopt($ch,CURLOPT_TIMEOUT,30);
|
Daniel@0
|
87 curl_setopt($ch,CURLOPT_USERAGENT,$this->nominatimAppName);
|
Daniel@0
|
88 $curlOutput = curl_exec($ch);
|
Daniel@0
|
89 $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
Daniel@0
|
90 curl_close($ch);
|
Daniel@0
|
91
|
Daniel@0
|
92 if ($httpcode !== 200) {
|
Daniel@0
|
93 $output->write(sprintf('<error>!%s!</error>', $httpcode));
|
Daniel@0
|
94 } else {
|
Daniel@0
|
95 $nominatimResult = json_decode($curlOutput, true);
|
Daniel@0
|
96 if (sizeof($nominatimResult)) {
|
Daniel@0
|
97 $placeResult = $nominatimResult[0]['address'];
|
Daniel@0
|
98 if ($i) {
|
Daniel@0
|
99 $placeResult['trim'] = $i;
|
Daniel@0
|
100 }
|
Daniel@0
|
101 }
|
Daniel@0
|
102 break;
|
Daniel@0
|
103 }
|
Daniel@0
|
104 }
|
Daniel@0
|
105 if (!$allowedNOfErrors) {
|
Daniel@0
|
106 throw new \Exception("Too many consequent errors in \"$queryURL\" :\n $curlOutput");
|
Daniel@0
|
107 }
|
Daniel@0
|
108 if ($placeResult) {
|
Daniel@0
|
109 break;
|
Daniel@0
|
110 } else {
|
Daniel@0
|
111 $output->write(sprintf('<info>%s</info>', $i + 1));
|
Daniel@0
|
112 }
|
Daniel@0
|
113 }
|
Daniel@0
|
114 if ($placeResult) {
|
Daniel@0
|
115 $output->writeln('<info>+</info>');
|
Daniel@0
|
116 } else {
|
Daniel@0
|
117 $output->writeln('<error>-</error>');
|
Daniel@0
|
118 }
|
Daniel@0
|
119
|
Daniel@0
|
120 $result[$placeName] = $placeResult;
|
Daniel@0
|
121
|
Daniel@0
|
122 $encodedResult = json_encode($result);
|
Daniel@0
|
123 $encodedResult = str_replace(array('null,', '},', '}}'), array("null,\n", "},\n", "}\n}"), $encodedResult);
|
Daniel@0
|
124
|
Daniel@0
|
125 file_put_contents($resultPath, $encodedResult);
|
Daniel@0
|
126 }
|
Daniel@0
|
127
|
Daniel@0
|
128 $output->writeln("Done.");
|
Daniel@0
|
129 }
|
Daniel@0
|
130 } |