Mercurial > hg > rr-repo
comparison modules/biblio/includes/Parser.php @ 6:a75ead649730
added biblio, admin_menu and reference modules
author | danieleb <danielebarchiesi@me.com> |
---|---|
date | Fri, 20 Sep 2013 11:18:21 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:d72257b2ddc2 | 6:a75ead649730 |
---|---|
1 <?php | |
2 /** | |
3 * Works with a Name object to parse out the parts of a name. | |
4 * | |
5 * Example usage: | |
6 * $parser = new Parser("John Q. Smith"); | |
7 * echo $parser->getLast() . ", " . $parser->getFirst(); | |
8 * //returns "Smith, John" | |
9 * | |
10 * | |
11 */ | |
12 class HumanNameParser_Parser { | |
13 private $name; | |
14 private $nameParts = array(); | |
15 private $leadingInit; | |
16 private $first; | |
17 private $nicknames; | |
18 private $middle; | |
19 private $last; | |
20 private $suffix; | |
21 private $category; | |
22 private $type; | |
23 private $literal; | |
24 | |
25 private $suffixes; | |
26 private $prefixes; | |
27 | |
28 /* | |
29 * Constructor | |
30 * | |
31 * @param mixed $name Either a name as a string or as a Name object. | |
32 */ | |
33 public function __construct($name = NULL) | |
34 { | |
35 $this->suffixes = array('esq','esquire','jr','sr','2','ii','iii','iv'); | |
36 $this->prefixes = array('bar','ben','bin','da','dal','de la', 'de', 'del','der','di', | |
37 'ibn','la','le','san','st','ste','van', 'van der', 'van den', 'vel','von'); | |
38 $this->setName($name); | |
39 } | |
40 | |
41 public function parseName($name = NULL, $category = NULL) { | |
42 $this->literal = 0; | |
43 $this->category = 1; | |
44 $this->type = 1; | |
45 if (is_array($name) && isset($name['name'])) { | |
46 if (isset($name['auth_category']) && !empty($name['auth_category']) && empty($category)) { | |
47 $this->category = $name['auth_category']; | |
48 } | |
49 elseif (!empty($category)) { | |
50 $this->category = $category; | |
51 } | |
52 if (isset($name['auth_type']) && !empty($name['auth_type'])) { | |
53 $this->type = $name['auth_type']; | |
54 } | |
55 $this->nameParts = $name; | |
56 $this->setName($name['name'], $category); | |
57 } | |
58 else { | |
59 $this->nameParts['name'] = $name; | |
60 $this->setName($name, $category); | |
61 } | |
62 | |
63 return $this->getArray(); | |
64 } | |
65 /** | |
66 * Sets name string and parses it. | |
67 * Takes Name object or a simple string (converts the string into a Name obj), | |
68 * parses and loads its constituant parts. | |
69 * | |
70 * @param mixed $name Either a name as a string or as a Name object. | |
71 */ | |
72 public function setName($name = NULL, $category = NULL){ | |
73 if ($name) { | |
74 $this->category == $category; | |
75 | |
76 if (is_object($name) && get_class($name) == "HumanNameParser_Name") { // this is mostly for testing | |
77 $this->name = $name; | |
78 } | |
79 elseif (is_array($name) && isset($name['name'])) { | |
80 $this->name = new HumanNameParser_Name($name['name']); | |
81 $this->nameParts = $name; | |
82 } | |
83 else { | |
84 $this->name = new HumanNameParser_Name($name); | |
85 } | |
86 | |
87 $this->leadingInit = ""; | |
88 $this->first = ""; | |
89 $this->nicknames = ""; | |
90 $this->middle = ""; | |
91 $this->last = ""; | |
92 $this->suffix = ""; | |
93 | |
94 if ($this->category == 5 || $this->type == 5) { | |
95 $this->last = $name; | |
96 $this->literal = TRUE; | |
97 } | |
98 else { | |
99 $this->parse(); | |
100 } | |
101 | |
102 } | |
103 } | |
104 | |
105 public function getleadingInit() { | |
106 return $this->leadingInit; | |
107 } | |
108 public function getFirst() { | |
109 return $this->first; | |
110 } | |
111 public function getNicknames() { | |
112 return $this->nicknames; | |
113 } | |
114 | |
115 public function getMiddle() { | |
116 return $this->middle; | |
117 } | |
118 | |
119 public function getLast() { | |
120 return $this->last; | |
121 } | |
122 | |
123 public function getSuffix() { | |
124 return $this->suffix; | |
125 } | |
126 public function getName(){ | |
127 return $this->name; | |
128 } | |
129 | |
130 /** | |
131 * returns all the parts of the name as an array | |
132 * | |
133 * @param String $arrType pass 'int' to get an integer-indexed array (default is associative) | |
134 * @return array An array of the name-parts | |
135 */ | |
136 public function getArray($arrType = 'assoc') { | |
137 $arr = array(); | |
138 $arr['prefix'] = $this->leadingInit; | |
139 $arr['firstname'] = $this->first; | |
140 $arr['nicknames'] = $this->nicknames; | |
141 $arr['initials'] = substr($this->middle, 0, 10); | |
142 $arr['lastname'] = $this->last; | |
143 $arr['suffix'] = $this->suffix; | |
144 $arr['md5'] = md5(json_encode($arr)); | |
145 $arr['literal'] = $this->literal; | |
146 | |
147 if ($arrType == 'assoc') { | |
148 return array_merge($this->nameParts, $arr); | |
149 } | |
150 else if ($arrType == 'int'){ | |
151 return array_values($arr); | |
152 } | |
153 else { | |
154 throw new Exception("Array must be associative ('assoc') or numeric ('num')."); | |
155 } | |
156 } | |
157 | |
158 /* | |
159 * Parse the name into its constituent parts. | |
160 * | |
161 * Sequentially captures each name-part, working in from the ends and | |
162 * trimming the namestring as it goes. | |
163 * | |
164 * @return boolean true on success | |
165 */ | |
166 private function parse() | |
167 { | |
168 $suffixes = implode("\.*|", $this->suffixes) . "\.*"; // each suffix gets a "\.*" behind it. | |
169 $prefixes = implode(" |", $this->prefixes) . " "; // each prefix gets a " " behind it. | |
170 | |
171 // The regex use is a bit tricky. *Everything* matched by the regex will be replaced, | |
172 // but you can select a particular parenthesized submatch to be returned. | |
173 // Also, note that each regex requres that the preceding ones have been run, and matches chopped out. | |
174 $nicknamesRegex = "/ ('|\"|\(\"*'*)(.+?)('|\"|\"*'*\)) /"; // names that starts or end w/ an apostrophe break this | |
175 $suffixRegex = "/,* *($suffixes)$/"; | |
176 $lastRegex = "/(?!^)\b([^ ]+ y |$prefixes)*[^ ]+$/"; | |
177 $leadingInitRegex = "/^(.\.*)(?= \p{L}{2})/"; // note the lookahead, which isn't returned or replaced | |
178 $firstRegex = "/^[^ ]+/"; // | |
179 | |
180 // get nickname, if there is one | |
181 $this->nicknames = $this->name->chopWithRegex($nicknamesRegex, 2); | |
182 | |
183 // get suffix, if there is one | |
184 $this->suffix = $this->name->chopWithRegex($suffixRegex, 1); | |
185 | |
186 // flip the before-comma and after-comma parts of the name | |
187 $this->name->flip(","); | |
188 | |
189 // get the last name | |
190 $this->last = $this->name->chopWithRegex($lastRegex, 0); | |
191 if (!$this->last){ | |
192 throw new Exception("Couldn't find a last name in '{$this->name->getStr()}'."); | |
193 } | |
194 | |
195 // get the first initial, if there is one | |
196 $this->leadingInit = $this->name->chopWithRegex($leadingInitRegex, 1); | |
197 | |
198 // get the first name | |
199 $this->first = $this->name->chopWithRegex($firstRegex, 0); | |
200 if (!$this->first && $this->category != 5){ | |
201 throw new Exception("Couldn't find a first name in '{$this->name->getStr()}'"); | |
202 } | |
203 | |
204 // if anything's left, that's the middle name | |
205 $this->middle = $this->name->getStr(); | |
206 return true; | |
207 } | |
208 | |
209 | |
210 | |
211 | |
212 | |
213 } | |
214 ?> |