danielebarchiesi@6: getLast() . ", " . $parser->getFirst(); danielebarchiesi@6: * //returns "Smith, John" danielebarchiesi@6: * danielebarchiesi@6: * danielebarchiesi@6: */ danielebarchiesi@6: class HumanNameParser_Parser { danielebarchiesi@6: private $name; danielebarchiesi@6: private $nameParts = array(); danielebarchiesi@6: private $leadingInit; danielebarchiesi@6: private $first; danielebarchiesi@6: private $nicknames; danielebarchiesi@6: private $middle; danielebarchiesi@6: private $last; danielebarchiesi@6: private $suffix; danielebarchiesi@6: private $category; danielebarchiesi@6: private $type; danielebarchiesi@6: private $literal; danielebarchiesi@6: danielebarchiesi@6: private $suffixes; danielebarchiesi@6: private $prefixes; danielebarchiesi@6: danielebarchiesi@6: /* danielebarchiesi@6: * Constructor danielebarchiesi@6: * danielebarchiesi@6: * @param mixed $name Either a name as a string or as a Name object. danielebarchiesi@6: */ danielebarchiesi@6: public function __construct($name = NULL) danielebarchiesi@6: { danielebarchiesi@6: $this->suffixes = array('esq','esquire','jr','sr','2','ii','iii','iv'); danielebarchiesi@6: $this->prefixes = array('bar','ben','bin','da','dal','de la', 'de', 'del','der','di', danielebarchiesi@6: 'ibn','la','le','san','st','ste','van', 'van der', 'van den', 'vel','von'); danielebarchiesi@6: $this->setName($name); danielebarchiesi@6: } danielebarchiesi@6: danielebarchiesi@6: public function parseName($name = NULL, $category = NULL) { danielebarchiesi@6: $this->literal = 0; danielebarchiesi@6: $this->category = 1; danielebarchiesi@6: $this->type = 1; danielebarchiesi@6: if (is_array($name) && isset($name['name'])) { danielebarchiesi@6: if (isset($name['auth_category']) && !empty($name['auth_category']) && empty($category)) { danielebarchiesi@6: $this->category = $name['auth_category']; danielebarchiesi@6: } danielebarchiesi@6: elseif (!empty($category)) { danielebarchiesi@6: $this->category = $category; danielebarchiesi@6: } danielebarchiesi@6: if (isset($name['auth_type']) && !empty($name['auth_type'])) { danielebarchiesi@6: $this->type = $name['auth_type']; danielebarchiesi@6: } danielebarchiesi@6: $this->nameParts = $name; danielebarchiesi@6: $this->setName($name['name'], $category); danielebarchiesi@6: } danielebarchiesi@6: else { danielebarchiesi@6: $this->nameParts['name'] = $name; danielebarchiesi@6: $this->setName($name, $category); danielebarchiesi@6: } danielebarchiesi@6: danielebarchiesi@6: return $this->getArray(); danielebarchiesi@6: } danielebarchiesi@6: /** danielebarchiesi@6: * Sets name string and parses it. danielebarchiesi@6: * Takes Name object or a simple string (converts the string into a Name obj), danielebarchiesi@6: * parses and loads its constituant parts. danielebarchiesi@6: * danielebarchiesi@6: * @param mixed $name Either a name as a string or as a Name object. danielebarchiesi@6: */ danielebarchiesi@6: public function setName($name = NULL, $category = NULL){ danielebarchiesi@6: if ($name) { danielebarchiesi@6: $this->category == $category; danielebarchiesi@6: danielebarchiesi@6: if (is_object($name) && get_class($name) == "HumanNameParser_Name") { // this is mostly for testing danielebarchiesi@6: $this->name = $name; danielebarchiesi@6: } danielebarchiesi@6: elseif (is_array($name) && isset($name['name'])) { danielebarchiesi@6: $this->name = new HumanNameParser_Name($name['name']); danielebarchiesi@6: $this->nameParts = $name; danielebarchiesi@6: } danielebarchiesi@6: else { danielebarchiesi@6: $this->name = new HumanNameParser_Name($name); danielebarchiesi@6: } danielebarchiesi@6: danielebarchiesi@6: $this->leadingInit = ""; danielebarchiesi@6: $this->first = ""; danielebarchiesi@6: $this->nicknames = ""; danielebarchiesi@6: $this->middle = ""; danielebarchiesi@6: $this->last = ""; danielebarchiesi@6: $this->suffix = ""; danielebarchiesi@6: danielebarchiesi@6: if ($this->category == 5 || $this->type == 5) { danielebarchiesi@6: $this->last = $name; danielebarchiesi@6: $this->literal = TRUE; danielebarchiesi@6: } danielebarchiesi@6: else { danielebarchiesi@6: $this->parse(); danielebarchiesi@6: } danielebarchiesi@6: danielebarchiesi@6: } danielebarchiesi@6: } danielebarchiesi@6: danielebarchiesi@6: public function getleadingInit() { danielebarchiesi@6: return $this->leadingInit; danielebarchiesi@6: } danielebarchiesi@6: public function getFirst() { danielebarchiesi@6: return $this->first; danielebarchiesi@6: } danielebarchiesi@6: public function getNicknames() { danielebarchiesi@6: return $this->nicknames; danielebarchiesi@6: } danielebarchiesi@6: danielebarchiesi@6: public function getMiddle() { danielebarchiesi@6: return $this->middle; danielebarchiesi@6: } danielebarchiesi@6: danielebarchiesi@6: public function getLast() { danielebarchiesi@6: return $this->last; danielebarchiesi@6: } danielebarchiesi@6: danielebarchiesi@6: public function getSuffix() { danielebarchiesi@6: return $this->suffix; danielebarchiesi@6: } danielebarchiesi@6: public function getName(){ danielebarchiesi@6: return $this->name; danielebarchiesi@6: } danielebarchiesi@6: danielebarchiesi@6: /** danielebarchiesi@6: * returns all the parts of the name as an array danielebarchiesi@6: * danielebarchiesi@6: * @param String $arrType pass 'int' to get an integer-indexed array (default is associative) danielebarchiesi@6: * @return array An array of the name-parts danielebarchiesi@6: */ danielebarchiesi@6: public function getArray($arrType = 'assoc') { danielebarchiesi@6: $arr = array(); danielebarchiesi@6: $arr['prefix'] = $this->leadingInit; danielebarchiesi@6: $arr['firstname'] = $this->first; danielebarchiesi@6: $arr['nicknames'] = $this->nicknames; danielebarchiesi@6: $arr['initials'] = substr($this->middle, 0, 10); danielebarchiesi@6: $arr['lastname'] = $this->last; danielebarchiesi@6: $arr['suffix'] = $this->suffix; danielebarchiesi@6: $arr['md5'] = md5(json_encode($arr)); danielebarchiesi@6: $arr['literal'] = $this->literal; danielebarchiesi@6: danielebarchiesi@6: if ($arrType == 'assoc') { danielebarchiesi@6: return array_merge($this->nameParts, $arr); danielebarchiesi@6: } danielebarchiesi@6: else if ($arrType == 'int'){ danielebarchiesi@6: return array_values($arr); danielebarchiesi@6: } danielebarchiesi@6: else { danielebarchiesi@6: throw new Exception("Array must be associative ('assoc') or numeric ('num')."); danielebarchiesi@6: } danielebarchiesi@6: } danielebarchiesi@6: danielebarchiesi@6: /* danielebarchiesi@6: * Parse the name into its constituent parts. danielebarchiesi@6: * danielebarchiesi@6: * Sequentially captures each name-part, working in from the ends and danielebarchiesi@6: * trimming the namestring as it goes. danielebarchiesi@6: * danielebarchiesi@6: * @return boolean true on success danielebarchiesi@6: */ danielebarchiesi@6: private function parse() danielebarchiesi@6: { danielebarchiesi@6: $suffixes = implode("\.*|", $this->suffixes) . "\.*"; // each suffix gets a "\.*" behind it. danielebarchiesi@6: $prefixes = implode(" |", $this->prefixes) . " "; // each prefix gets a " " behind it. danielebarchiesi@6: danielebarchiesi@6: // The regex use is a bit tricky. *Everything* matched by the regex will be replaced, danielebarchiesi@6: // but you can select a particular parenthesized submatch to be returned. danielebarchiesi@6: // Also, note that each regex requres that the preceding ones have been run, and matches chopped out. danielebarchiesi@6: $nicknamesRegex = "/ ('|\"|\(\"*'*)(.+?)('|\"|\"*'*\)) /"; // names that starts or end w/ an apostrophe break this danielebarchiesi@6: $suffixRegex = "/,* *($suffixes)$/"; danielebarchiesi@6: $lastRegex = "/(?!^)\b([^ ]+ y |$prefixes)*[^ ]+$/"; danielebarchiesi@6: $leadingInitRegex = "/^(.\.*)(?= \p{L}{2})/"; // note the lookahead, which isn't returned or replaced danielebarchiesi@6: $firstRegex = "/^[^ ]+/"; // danielebarchiesi@6: danielebarchiesi@6: // get nickname, if there is one danielebarchiesi@6: $this->nicknames = $this->name->chopWithRegex($nicknamesRegex, 2); danielebarchiesi@6: danielebarchiesi@6: // get suffix, if there is one danielebarchiesi@6: $this->suffix = $this->name->chopWithRegex($suffixRegex, 1); danielebarchiesi@6: danielebarchiesi@6: // flip the before-comma and after-comma parts of the name danielebarchiesi@6: $this->name->flip(","); danielebarchiesi@6: danielebarchiesi@6: // get the last name danielebarchiesi@6: $this->last = $this->name->chopWithRegex($lastRegex, 0); danielebarchiesi@6: if (!$this->last){ danielebarchiesi@6: throw new Exception("Couldn't find a last name in '{$this->name->getStr()}'."); danielebarchiesi@6: } danielebarchiesi@6: danielebarchiesi@6: // get the first initial, if there is one danielebarchiesi@6: $this->leadingInit = $this->name->chopWithRegex($leadingInitRegex, 1); danielebarchiesi@6: danielebarchiesi@6: // get the first name danielebarchiesi@6: $this->first = $this->name->chopWithRegex($firstRegex, 0); danielebarchiesi@6: if (!$this->first && $this->category != 5){ danielebarchiesi@6: throw new Exception("Couldn't find a first name in '{$this->name->getStr()}'"); danielebarchiesi@6: } danielebarchiesi@6: danielebarchiesi@6: // if anything's left, that's the middle name danielebarchiesi@6: $this->middle = $this->name->getStr(); danielebarchiesi@6: return true; danielebarchiesi@6: } danielebarchiesi@6: danielebarchiesi@6: danielebarchiesi@6: danielebarchiesi@6: danielebarchiesi@6: danielebarchiesi@6: } danielebarchiesi@6: ?>