Mercurial > hg > rr-repo
diff modules/biblio/includes/Parser.php @ 6:a75ead649730
added biblio, admin_menu and reference modules
author | danieleb <danielebarchiesi@me.com> |
---|---|
date | Fri, 20 Sep 2013 11:18:21 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modules/biblio/includes/Parser.php Fri Sep 20 11:18:21 2013 +0100 @@ -0,0 +1,214 @@ +<?php +/** + * Works with a Name object to parse out the parts of a name. + * + * Example usage: + * $parser = new Parser("John Q. Smith"); + * echo $parser->getLast() . ", " . $parser->getFirst(); + * //returns "Smith, John" + * + * + */ +class HumanNameParser_Parser { + private $name; + private $nameParts = array(); + private $leadingInit; + private $first; + private $nicknames; + private $middle; + private $last; + private $suffix; + private $category; + private $type; + private $literal; + + private $suffixes; + private $prefixes; + + /* + * Constructor + * + * @param mixed $name Either a name as a string or as a Name object. + */ + public function __construct($name = NULL) + { + $this->suffixes = array('esq','esquire','jr','sr','2','ii','iii','iv'); + $this->prefixes = array('bar','ben','bin','da','dal','de la', 'de', 'del','der','di', + 'ibn','la','le','san','st','ste','van', 'van der', 'van den', 'vel','von'); + $this->setName($name); + } + + public function parseName($name = NULL, $category = NULL) { + $this->literal = 0; + $this->category = 1; + $this->type = 1; + if (is_array($name) && isset($name['name'])) { + if (isset($name['auth_category']) && !empty($name['auth_category']) && empty($category)) { + $this->category = $name['auth_category']; + } + elseif (!empty($category)) { + $this->category = $category; + } + if (isset($name['auth_type']) && !empty($name['auth_type'])) { + $this->type = $name['auth_type']; + } + $this->nameParts = $name; + $this->setName($name['name'], $category); + } + else { + $this->nameParts['name'] = $name; + $this->setName($name, $category); + } + + return $this->getArray(); + } + /** + * Sets name string and parses it. + * Takes Name object or a simple string (converts the string into a Name obj), + * parses and loads its constituant parts. + * + * @param mixed $name Either a name as a string or as a Name object. + */ + public function setName($name = NULL, $category = NULL){ + if ($name) { + $this->category == $category; + + if (is_object($name) && get_class($name) == "HumanNameParser_Name") { // this is mostly for testing + $this->name = $name; + } + elseif (is_array($name) && isset($name['name'])) { + $this->name = new HumanNameParser_Name($name['name']); + $this->nameParts = $name; + } + else { + $this->name = new HumanNameParser_Name($name); + } + + $this->leadingInit = ""; + $this->first = ""; + $this->nicknames = ""; + $this->middle = ""; + $this->last = ""; + $this->suffix = ""; + + if ($this->category == 5 || $this->type == 5) { + $this->last = $name; + $this->literal = TRUE; + } + else { + $this->parse(); + } + + } + } + + public function getleadingInit() { + return $this->leadingInit; + } + public function getFirst() { + return $this->first; + } + public function getNicknames() { + return $this->nicknames; + } + + public function getMiddle() { + return $this->middle; + } + + public function getLast() { + return $this->last; + } + + public function getSuffix() { + return $this->suffix; + } + public function getName(){ + return $this->name; + } + + /** + * returns all the parts of the name as an array + * + * @param String $arrType pass 'int' to get an integer-indexed array (default is associative) + * @return array An array of the name-parts + */ + public function getArray($arrType = 'assoc') { + $arr = array(); + $arr['prefix'] = $this->leadingInit; + $arr['firstname'] = $this->first; + $arr['nicknames'] = $this->nicknames; + $arr['initials'] = substr($this->middle, 0, 10); + $arr['lastname'] = $this->last; + $arr['suffix'] = $this->suffix; + $arr['md5'] = md5(json_encode($arr)); + $arr['literal'] = $this->literal; + + if ($arrType == 'assoc') { + return array_merge($this->nameParts, $arr); + } + else if ($arrType == 'int'){ + return array_values($arr); + } + else { + throw new Exception("Array must be associative ('assoc') or numeric ('num')."); + } + } + + /* + * Parse the name into its constituent parts. + * + * Sequentially captures each name-part, working in from the ends and + * trimming the namestring as it goes. + * + * @return boolean true on success + */ + private function parse() + { + $suffixes = implode("\.*|", $this->suffixes) . "\.*"; // each suffix gets a "\.*" behind it. + $prefixes = implode(" |", $this->prefixes) . " "; // each prefix gets a " " behind it. + + // The regex use is a bit tricky. *Everything* matched by the regex will be replaced, + // but you can select a particular parenthesized submatch to be returned. + // Also, note that each regex requres that the preceding ones have been run, and matches chopped out. + $nicknamesRegex = "/ ('|\"|\(\"*'*)(.+?)('|\"|\"*'*\)) /"; // names that starts or end w/ an apostrophe break this + $suffixRegex = "/,* *($suffixes)$/"; + $lastRegex = "/(?!^)\b([^ ]+ y |$prefixes)*[^ ]+$/"; + $leadingInitRegex = "/^(.\.*)(?= \p{L}{2})/"; // note the lookahead, which isn't returned or replaced + $firstRegex = "/^[^ ]+/"; // + + // get nickname, if there is one + $this->nicknames = $this->name->chopWithRegex($nicknamesRegex, 2); + + // get suffix, if there is one + $this->suffix = $this->name->chopWithRegex($suffixRegex, 1); + + // flip the before-comma and after-comma parts of the name + $this->name->flip(","); + + // get the last name + $this->last = $this->name->chopWithRegex($lastRegex, 0); + if (!$this->last){ + throw new Exception("Couldn't find a last name in '{$this->name->getStr()}'."); + } + + // get the first initial, if there is one + $this->leadingInit = $this->name->chopWithRegex($leadingInitRegex, 1); + + // get the first name + $this->first = $this->name->chopWithRegex($firstRegex, 0); + if (!$this->first && $this->category != 5){ + throw new Exception("Couldn't find a first name in '{$this->name->getStr()}'"); + } + + // if anything's left, that's the middle name + $this->middle = $this->name->getStr(); + return true; + } + + + + + +} +?>