danielebarchiesi@6
|
1 <?php
|
danielebarchiesi@6
|
2 /**
|
danielebarchiesi@6
|
3 * Works with a Name object to parse out the parts of a name.
|
danielebarchiesi@6
|
4 *
|
danielebarchiesi@6
|
5 * Example usage:
|
danielebarchiesi@6
|
6 * $parser = new Parser("John Q. Smith");
|
danielebarchiesi@6
|
7 * echo $parser->getLast() . ", " . $parser->getFirst();
|
danielebarchiesi@6
|
8 * //returns "Smith, John"
|
danielebarchiesi@6
|
9 *
|
danielebarchiesi@6
|
10 *
|
danielebarchiesi@6
|
11 */
|
danielebarchiesi@6
|
12 class HumanNameParser_Parser {
|
danielebarchiesi@6
|
13 private $name;
|
danielebarchiesi@6
|
14 private $nameParts = array();
|
danielebarchiesi@6
|
15 private $leadingInit;
|
danielebarchiesi@6
|
16 private $first;
|
danielebarchiesi@6
|
17 private $nicknames;
|
danielebarchiesi@6
|
18 private $middle;
|
danielebarchiesi@6
|
19 private $last;
|
danielebarchiesi@6
|
20 private $suffix;
|
danielebarchiesi@6
|
21 private $category;
|
danielebarchiesi@6
|
22 private $type;
|
danielebarchiesi@6
|
23 private $literal;
|
danielebarchiesi@6
|
24
|
danielebarchiesi@6
|
25 private $suffixes;
|
danielebarchiesi@6
|
26 private $prefixes;
|
danielebarchiesi@6
|
27
|
danielebarchiesi@6
|
28 /*
|
danielebarchiesi@6
|
29 * Constructor
|
danielebarchiesi@6
|
30 *
|
danielebarchiesi@6
|
31 * @param mixed $name Either a name as a string or as a Name object.
|
danielebarchiesi@6
|
32 */
|
danielebarchiesi@6
|
33 public function __construct($name = NULL)
|
danielebarchiesi@6
|
34 {
|
danielebarchiesi@6
|
35 $this->suffixes = array('esq','esquire','jr','sr','2','ii','iii','iv');
|
danielebarchiesi@6
|
36 $this->prefixes = array('bar','ben','bin','da','dal','de la', 'de', 'del','der','di',
|
danielebarchiesi@6
|
37 'ibn','la','le','san','st','ste','van', 'van der', 'van den', 'vel','von');
|
danielebarchiesi@6
|
38 $this->setName($name);
|
danielebarchiesi@6
|
39 }
|
danielebarchiesi@6
|
40
|
danielebarchiesi@6
|
41 public function parseName($name = NULL, $category = NULL) {
|
danielebarchiesi@6
|
42 $this->literal = 0;
|
danielebarchiesi@6
|
43 $this->category = 1;
|
danielebarchiesi@6
|
44 $this->type = 1;
|
danielebarchiesi@6
|
45 if (is_array($name) && isset($name['name'])) {
|
danielebarchiesi@6
|
46 if (isset($name['auth_category']) && !empty($name['auth_category']) && empty($category)) {
|
danielebarchiesi@6
|
47 $this->category = $name['auth_category'];
|
danielebarchiesi@6
|
48 }
|
danielebarchiesi@6
|
49 elseif (!empty($category)) {
|
danielebarchiesi@6
|
50 $this->category = $category;
|
danielebarchiesi@6
|
51 }
|
danielebarchiesi@6
|
52 if (isset($name['auth_type']) && !empty($name['auth_type'])) {
|
danielebarchiesi@6
|
53 $this->type = $name['auth_type'];
|
danielebarchiesi@6
|
54 }
|
danielebarchiesi@6
|
55 $this->nameParts = $name;
|
danielebarchiesi@6
|
56 $this->setName($name['name'], $category);
|
danielebarchiesi@6
|
57 }
|
danielebarchiesi@6
|
58 else {
|
danielebarchiesi@6
|
59 $this->nameParts['name'] = $name;
|
danielebarchiesi@6
|
60 $this->setName($name, $category);
|
danielebarchiesi@6
|
61 }
|
danielebarchiesi@6
|
62
|
danielebarchiesi@6
|
63 return $this->getArray();
|
danielebarchiesi@6
|
64 }
|
danielebarchiesi@6
|
65 /**
|
danielebarchiesi@6
|
66 * Sets name string and parses it.
|
danielebarchiesi@6
|
67 * Takes Name object or a simple string (converts the string into a Name obj),
|
danielebarchiesi@6
|
68 * parses and loads its constituant parts.
|
danielebarchiesi@6
|
69 *
|
danielebarchiesi@6
|
70 * @param mixed $name Either a name as a string or as a Name object.
|
danielebarchiesi@6
|
71 */
|
danielebarchiesi@6
|
72 public function setName($name = NULL, $category = NULL){
|
danielebarchiesi@6
|
73 if ($name) {
|
danielebarchiesi@6
|
74 $this->category == $category;
|
danielebarchiesi@6
|
75
|
danielebarchiesi@6
|
76 if (is_object($name) && get_class($name) == "HumanNameParser_Name") { // this is mostly for testing
|
danielebarchiesi@6
|
77 $this->name = $name;
|
danielebarchiesi@6
|
78 }
|
danielebarchiesi@6
|
79 elseif (is_array($name) && isset($name['name'])) {
|
danielebarchiesi@6
|
80 $this->name = new HumanNameParser_Name($name['name']);
|
danielebarchiesi@6
|
81 $this->nameParts = $name;
|
danielebarchiesi@6
|
82 }
|
danielebarchiesi@6
|
83 else {
|
danielebarchiesi@6
|
84 $this->name = new HumanNameParser_Name($name);
|
danielebarchiesi@6
|
85 }
|
danielebarchiesi@6
|
86
|
danielebarchiesi@6
|
87 $this->leadingInit = "";
|
danielebarchiesi@6
|
88 $this->first = "";
|
danielebarchiesi@6
|
89 $this->nicknames = "";
|
danielebarchiesi@6
|
90 $this->middle = "";
|
danielebarchiesi@6
|
91 $this->last = "";
|
danielebarchiesi@6
|
92 $this->suffix = "";
|
danielebarchiesi@6
|
93
|
danielebarchiesi@6
|
94 if ($this->category == 5 || $this->type == 5) {
|
danielebarchiesi@6
|
95 $this->last = $name;
|
danielebarchiesi@6
|
96 $this->literal = TRUE;
|
danielebarchiesi@6
|
97 }
|
danielebarchiesi@6
|
98 else {
|
danielebarchiesi@6
|
99 $this->parse();
|
danielebarchiesi@6
|
100 }
|
danielebarchiesi@6
|
101
|
danielebarchiesi@6
|
102 }
|
danielebarchiesi@6
|
103 }
|
danielebarchiesi@6
|
104
|
danielebarchiesi@6
|
105 public function getleadingInit() {
|
danielebarchiesi@6
|
106 return $this->leadingInit;
|
danielebarchiesi@6
|
107 }
|
danielebarchiesi@6
|
108 public function getFirst() {
|
danielebarchiesi@6
|
109 return $this->first;
|
danielebarchiesi@6
|
110 }
|
danielebarchiesi@6
|
111 public function getNicknames() {
|
danielebarchiesi@6
|
112 return $this->nicknames;
|
danielebarchiesi@6
|
113 }
|
danielebarchiesi@6
|
114
|
danielebarchiesi@6
|
115 public function getMiddle() {
|
danielebarchiesi@6
|
116 return $this->middle;
|
danielebarchiesi@6
|
117 }
|
danielebarchiesi@6
|
118
|
danielebarchiesi@6
|
119 public function getLast() {
|
danielebarchiesi@6
|
120 return $this->last;
|
danielebarchiesi@6
|
121 }
|
danielebarchiesi@6
|
122
|
danielebarchiesi@6
|
123 public function getSuffix() {
|
danielebarchiesi@6
|
124 return $this->suffix;
|
danielebarchiesi@6
|
125 }
|
danielebarchiesi@6
|
126 public function getName(){
|
danielebarchiesi@6
|
127 return $this->name;
|
danielebarchiesi@6
|
128 }
|
danielebarchiesi@6
|
129
|
danielebarchiesi@6
|
130 /**
|
danielebarchiesi@6
|
131 * returns all the parts of the name as an array
|
danielebarchiesi@6
|
132 *
|
danielebarchiesi@6
|
133 * @param String $arrType pass 'int' to get an integer-indexed array (default is associative)
|
danielebarchiesi@6
|
134 * @return array An array of the name-parts
|
danielebarchiesi@6
|
135 */
|
danielebarchiesi@6
|
136 public function getArray($arrType = 'assoc') {
|
danielebarchiesi@6
|
137 $arr = array();
|
danielebarchiesi@6
|
138 $arr['prefix'] = $this->leadingInit;
|
danielebarchiesi@6
|
139 $arr['firstname'] = $this->first;
|
danielebarchiesi@6
|
140 $arr['nicknames'] = $this->nicknames;
|
danielebarchiesi@6
|
141 $arr['initials'] = substr($this->middle, 0, 10);
|
danielebarchiesi@6
|
142 $arr['lastname'] = $this->last;
|
danielebarchiesi@6
|
143 $arr['suffix'] = $this->suffix;
|
danielebarchiesi@6
|
144 $arr['md5'] = md5(json_encode($arr));
|
danielebarchiesi@6
|
145 $arr['literal'] = $this->literal;
|
danielebarchiesi@6
|
146
|
danielebarchiesi@6
|
147 if ($arrType == 'assoc') {
|
danielebarchiesi@6
|
148 return array_merge($this->nameParts, $arr);
|
danielebarchiesi@6
|
149 }
|
danielebarchiesi@6
|
150 else if ($arrType == 'int'){
|
danielebarchiesi@6
|
151 return array_values($arr);
|
danielebarchiesi@6
|
152 }
|
danielebarchiesi@6
|
153 else {
|
danielebarchiesi@6
|
154 throw new Exception("Array must be associative ('assoc') or numeric ('num').");
|
danielebarchiesi@6
|
155 }
|
danielebarchiesi@6
|
156 }
|
danielebarchiesi@6
|
157
|
danielebarchiesi@6
|
158 /*
|
danielebarchiesi@6
|
159 * Parse the name into its constituent parts.
|
danielebarchiesi@6
|
160 *
|
danielebarchiesi@6
|
161 * Sequentially captures each name-part, working in from the ends and
|
danielebarchiesi@6
|
162 * trimming the namestring as it goes.
|
danielebarchiesi@6
|
163 *
|
danielebarchiesi@6
|
164 * @return boolean true on success
|
danielebarchiesi@6
|
165 */
|
danielebarchiesi@6
|
166 private function parse()
|
danielebarchiesi@6
|
167 {
|
danielebarchiesi@6
|
168 $suffixes = implode("\.*|", $this->suffixes) . "\.*"; // each suffix gets a "\.*" behind it.
|
danielebarchiesi@6
|
169 $prefixes = implode(" |", $this->prefixes) . " "; // each prefix gets a " " behind it.
|
danielebarchiesi@6
|
170
|
danielebarchiesi@6
|
171 // The regex use is a bit tricky. *Everything* matched by the regex will be replaced,
|
danielebarchiesi@6
|
172 // but you can select a particular parenthesized submatch to be returned.
|
danielebarchiesi@6
|
173 // Also, note that each regex requres that the preceding ones have been run, and matches chopped out.
|
danielebarchiesi@6
|
174 $nicknamesRegex = "/ ('|\"|\(\"*'*)(.+?)('|\"|\"*'*\)) /"; // names that starts or end w/ an apostrophe break this
|
danielebarchiesi@6
|
175 $suffixRegex = "/,* *($suffixes)$/";
|
danielebarchiesi@6
|
176 $lastRegex = "/(?!^)\b([^ ]+ y |$prefixes)*[^ ]+$/";
|
danielebarchiesi@6
|
177 $leadingInitRegex = "/^(.\.*)(?= \p{L}{2})/"; // note the lookahead, which isn't returned or replaced
|
danielebarchiesi@6
|
178 $firstRegex = "/^[^ ]+/"; //
|
danielebarchiesi@6
|
179
|
danielebarchiesi@6
|
180 // get nickname, if there is one
|
danielebarchiesi@6
|
181 $this->nicknames = $this->name->chopWithRegex($nicknamesRegex, 2);
|
danielebarchiesi@6
|
182
|
danielebarchiesi@6
|
183 // get suffix, if there is one
|
danielebarchiesi@6
|
184 $this->suffix = $this->name->chopWithRegex($suffixRegex, 1);
|
danielebarchiesi@6
|
185
|
danielebarchiesi@6
|
186 // flip the before-comma and after-comma parts of the name
|
danielebarchiesi@6
|
187 $this->name->flip(",");
|
danielebarchiesi@6
|
188
|
danielebarchiesi@6
|
189 // get the last name
|
danielebarchiesi@6
|
190 $this->last = $this->name->chopWithRegex($lastRegex, 0);
|
danielebarchiesi@6
|
191 if (!$this->last){
|
danielebarchiesi@6
|
192 throw new Exception("Couldn't find a last name in '{$this->name->getStr()}'.");
|
danielebarchiesi@6
|
193 }
|
danielebarchiesi@6
|
194
|
danielebarchiesi@6
|
195 // get the first initial, if there is one
|
danielebarchiesi@6
|
196 $this->leadingInit = $this->name->chopWithRegex($leadingInitRegex, 1);
|
danielebarchiesi@6
|
197
|
danielebarchiesi@6
|
198 // get the first name
|
danielebarchiesi@6
|
199 $this->first = $this->name->chopWithRegex($firstRegex, 0);
|
danielebarchiesi@6
|
200 if (!$this->first && $this->category != 5){
|
danielebarchiesi@6
|
201 throw new Exception("Couldn't find a first name in '{$this->name->getStr()}'");
|
danielebarchiesi@6
|
202 }
|
danielebarchiesi@6
|
203
|
danielebarchiesi@6
|
204 // if anything's left, that's the middle name
|
danielebarchiesi@6
|
205 $this->middle = $this->name->getStr();
|
danielebarchiesi@6
|
206 return true;
|
danielebarchiesi@6
|
207 }
|
danielebarchiesi@6
|
208
|
danielebarchiesi@6
|
209
|
danielebarchiesi@6
|
210
|
danielebarchiesi@6
|
211
|
danielebarchiesi@6
|
212
|
danielebarchiesi@6
|
213 }
|
danielebarchiesi@6
|
214 ?>
|