annotate modules/biblio/includes/Parser.php @ 6:a75ead649730

added biblio, admin_menu and reference modules
author danieleb <danielebarchiesi@me.com>
date Fri, 20 Sep 2013 11:18:21 +0100
parents
children
rev   line source
danielebarchiesi@6 1 <?php
danielebarchiesi@6 2 /**
danielebarchiesi@6 3 * Works with a Name object to parse out the parts of a name.
danielebarchiesi@6 4 *
danielebarchiesi@6 5 * Example usage:
danielebarchiesi@6 6 * $parser = new Parser("John Q. Smith");
danielebarchiesi@6 7 * echo $parser->getLast() . ", " . $parser->getFirst();
danielebarchiesi@6 8 * //returns "Smith, John"
danielebarchiesi@6 9 *
danielebarchiesi@6 10 *
danielebarchiesi@6 11 */
danielebarchiesi@6 12 class HumanNameParser_Parser {
danielebarchiesi@6 13 private $name;
danielebarchiesi@6 14 private $nameParts = array();
danielebarchiesi@6 15 private $leadingInit;
danielebarchiesi@6 16 private $first;
danielebarchiesi@6 17 private $nicknames;
danielebarchiesi@6 18 private $middle;
danielebarchiesi@6 19 private $last;
danielebarchiesi@6 20 private $suffix;
danielebarchiesi@6 21 private $category;
danielebarchiesi@6 22 private $type;
danielebarchiesi@6 23 private $literal;
danielebarchiesi@6 24
danielebarchiesi@6 25 private $suffixes;
danielebarchiesi@6 26 private $prefixes;
danielebarchiesi@6 27
danielebarchiesi@6 28 /*
danielebarchiesi@6 29 * Constructor
danielebarchiesi@6 30 *
danielebarchiesi@6 31 * @param mixed $name Either a name as a string or as a Name object.
danielebarchiesi@6 32 */
danielebarchiesi@6 33 public function __construct($name = NULL)
danielebarchiesi@6 34 {
danielebarchiesi@6 35 $this->suffixes = array('esq','esquire','jr','sr','2','ii','iii','iv');
danielebarchiesi@6 36 $this->prefixes = array('bar','ben','bin','da','dal','de la', 'de', 'del','der','di',
danielebarchiesi@6 37 'ibn','la','le','san','st','ste','van', 'van der', 'van den', 'vel','von');
danielebarchiesi@6 38 $this->setName($name);
danielebarchiesi@6 39 }
danielebarchiesi@6 40
danielebarchiesi@6 41 public function parseName($name = NULL, $category = NULL) {
danielebarchiesi@6 42 $this->literal = 0;
danielebarchiesi@6 43 $this->category = 1;
danielebarchiesi@6 44 $this->type = 1;
danielebarchiesi@6 45 if (is_array($name) && isset($name['name'])) {
danielebarchiesi@6 46 if (isset($name['auth_category']) && !empty($name['auth_category']) && empty($category)) {
danielebarchiesi@6 47 $this->category = $name['auth_category'];
danielebarchiesi@6 48 }
danielebarchiesi@6 49 elseif (!empty($category)) {
danielebarchiesi@6 50 $this->category = $category;
danielebarchiesi@6 51 }
danielebarchiesi@6 52 if (isset($name['auth_type']) && !empty($name['auth_type'])) {
danielebarchiesi@6 53 $this->type = $name['auth_type'];
danielebarchiesi@6 54 }
danielebarchiesi@6 55 $this->nameParts = $name;
danielebarchiesi@6 56 $this->setName($name['name'], $category);
danielebarchiesi@6 57 }
danielebarchiesi@6 58 else {
danielebarchiesi@6 59 $this->nameParts['name'] = $name;
danielebarchiesi@6 60 $this->setName($name, $category);
danielebarchiesi@6 61 }
danielebarchiesi@6 62
danielebarchiesi@6 63 return $this->getArray();
danielebarchiesi@6 64 }
danielebarchiesi@6 65 /**
danielebarchiesi@6 66 * Sets name string and parses it.
danielebarchiesi@6 67 * Takes Name object or a simple string (converts the string into a Name obj),
danielebarchiesi@6 68 * parses and loads its constituant parts.
danielebarchiesi@6 69 *
danielebarchiesi@6 70 * @param mixed $name Either a name as a string or as a Name object.
danielebarchiesi@6 71 */
danielebarchiesi@6 72 public function setName($name = NULL, $category = NULL){
danielebarchiesi@6 73 if ($name) {
danielebarchiesi@6 74 $this->category == $category;
danielebarchiesi@6 75
danielebarchiesi@6 76 if (is_object($name) && get_class($name) == "HumanNameParser_Name") { // this is mostly for testing
danielebarchiesi@6 77 $this->name = $name;
danielebarchiesi@6 78 }
danielebarchiesi@6 79 elseif (is_array($name) && isset($name['name'])) {
danielebarchiesi@6 80 $this->name = new HumanNameParser_Name($name['name']);
danielebarchiesi@6 81 $this->nameParts = $name;
danielebarchiesi@6 82 }
danielebarchiesi@6 83 else {
danielebarchiesi@6 84 $this->name = new HumanNameParser_Name($name);
danielebarchiesi@6 85 }
danielebarchiesi@6 86
danielebarchiesi@6 87 $this->leadingInit = "";
danielebarchiesi@6 88 $this->first = "";
danielebarchiesi@6 89 $this->nicknames = "";
danielebarchiesi@6 90 $this->middle = "";
danielebarchiesi@6 91 $this->last = "";
danielebarchiesi@6 92 $this->suffix = "";
danielebarchiesi@6 93
danielebarchiesi@6 94 if ($this->category == 5 || $this->type == 5) {
danielebarchiesi@6 95 $this->last = $name;
danielebarchiesi@6 96 $this->literal = TRUE;
danielebarchiesi@6 97 }
danielebarchiesi@6 98 else {
danielebarchiesi@6 99 $this->parse();
danielebarchiesi@6 100 }
danielebarchiesi@6 101
danielebarchiesi@6 102 }
danielebarchiesi@6 103 }
danielebarchiesi@6 104
danielebarchiesi@6 105 public function getleadingInit() {
danielebarchiesi@6 106 return $this->leadingInit;
danielebarchiesi@6 107 }
danielebarchiesi@6 108 public function getFirst() {
danielebarchiesi@6 109 return $this->first;
danielebarchiesi@6 110 }
danielebarchiesi@6 111 public function getNicknames() {
danielebarchiesi@6 112 return $this->nicknames;
danielebarchiesi@6 113 }
danielebarchiesi@6 114
danielebarchiesi@6 115 public function getMiddle() {
danielebarchiesi@6 116 return $this->middle;
danielebarchiesi@6 117 }
danielebarchiesi@6 118
danielebarchiesi@6 119 public function getLast() {
danielebarchiesi@6 120 return $this->last;
danielebarchiesi@6 121 }
danielebarchiesi@6 122
danielebarchiesi@6 123 public function getSuffix() {
danielebarchiesi@6 124 return $this->suffix;
danielebarchiesi@6 125 }
danielebarchiesi@6 126 public function getName(){
danielebarchiesi@6 127 return $this->name;
danielebarchiesi@6 128 }
danielebarchiesi@6 129
danielebarchiesi@6 130 /**
danielebarchiesi@6 131 * returns all the parts of the name as an array
danielebarchiesi@6 132 *
danielebarchiesi@6 133 * @param String $arrType pass 'int' to get an integer-indexed array (default is associative)
danielebarchiesi@6 134 * @return array An array of the name-parts
danielebarchiesi@6 135 */
danielebarchiesi@6 136 public function getArray($arrType = 'assoc') {
danielebarchiesi@6 137 $arr = array();
danielebarchiesi@6 138 $arr['prefix'] = $this->leadingInit;
danielebarchiesi@6 139 $arr['firstname'] = $this->first;
danielebarchiesi@6 140 $arr['nicknames'] = $this->nicknames;
danielebarchiesi@6 141 $arr['initials'] = substr($this->middle, 0, 10);
danielebarchiesi@6 142 $arr['lastname'] = $this->last;
danielebarchiesi@6 143 $arr['suffix'] = $this->suffix;
danielebarchiesi@6 144 $arr['md5'] = md5(json_encode($arr));
danielebarchiesi@6 145 $arr['literal'] = $this->literal;
danielebarchiesi@6 146
danielebarchiesi@6 147 if ($arrType == 'assoc') {
danielebarchiesi@6 148 return array_merge($this->nameParts, $arr);
danielebarchiesi@6 149 }
danielebarchiesi@6 150 else if ($arrType == 'int'){
danielebarchiesi@6 151 return array_values($arr);
danielebarchiesi@6 152 }
danielebarchiesi@6 153 else {
danielebarchiesi@6 154 throw new Exception("Array must be associative ('assoc') or numeric ('num').");
danielebarchiesi@6 155 }
danielebarchiesi@6 156 }
danielebarchiesi@6 157
danielebarchiesi@6 158 /*
danielebarchiesi@6 159 * Parse the name into its constituent parts.
danielebarchiesi@6 160 *
danielebarchiesi@6 161 * Sequentially captures each name-part, working in from the ends and
danielebarchiesi@6 162 * trimming the namestring as it goes.
danielebarchiesi@6 163 *
danielebarchiesi@6 164 * @return boolean true on success
danielebarchiesi@6 165 */
danielebarchiesi@6 166 private function parse()
danielebarchiesi@6 167 {
danielebarchiesi@6 168 $suffixes = implode("\.*|", $this->suffixes) . "\.*"; // each suffix gets a "\.*" behind it.
danielebarchiesi@6 169 $prefixes = implode(" |", $this->prefixes) . " "; // each prefix gets a " " behind it.
danielebarchiesi@6 170
danielebarchiesi@6 171 // The regex use is a bit tricky. *Everything* matched by the regex will be replaced,
danielebarchiesi@6 172 // but you can select a particular parenthesized submatch to be returned.
danielebarchiesi@6 173 // Also, note that each regex requres that the preceding ones have been run, and matches chopped out.
danielebarchiesi@6 174 $nicknamesRegex = "/ ('|\"|\(\"*'*)(.+?)('|\"|\"*'*\)) /"; // names that starts or end w/ an apostrophe break this
danielebarchiesi@6 175 $suffixRegex = "/,* *($suffixes)$/";
danielebarchiesi@6 176 $lastRegex = "/(?!^)\b([^ ]+ y |$prefixes)*[^ ]+$/";
danielebarchiesi@6 177 $leadingInitRegex = "/^(.\.*)(?= \p{L}{2})/"; // note the lookahead, which isn't returned or replaced
danielebarchiesi@6 178 $firstRegex = "/^[^ ]+/"; //
danielebarchiesi@6 179
danielebarchiesi@6 180 // get nickname, if there is one
danielebarchiesi@6 181 $this->nicknames = $this->name->chopWithRegex($nicknamesRegex, 2);
danielebarchiesi@6 182
danielebarchiesi@6 183 // get suffix, if there is one
danielebarchiesi@6 184 $this->suffix = $this->name->chopWithRegex($suffixRegex, 1);
danielebarchiesi@6 185
danielebarchiesi@6 186 // flip the before-comma and after-comma parts of the name
danielebarchiesi@6 187 $this->name->flip(",");
danielebarchiesi@6 188
danielebarchiesi@6 189 // get the last name
danielebarchiesi@6 190 $this->last = $this->name->chopWithRegex($lastRegex, 0);
danielebarchiesi@6 191 if (!$this->last){
danielebarchiesi@6 192 throw new Exception("Couldn't find a last name in '{$this->name->getStr()}'.");
danielebarchiesi@6 193 }
danielebarchiesi@6 194
danielebarchiesi@6 195 // get the first initial, if there is one
danielebarchiesi@6 196 $this->leadingInit = $this->name->chopWithRegex($leadingInitRegex, 1);
danielebarchiesi@6 197
danielebarchiesi@6 198 // get the first name
danielebarchiesi@6 199 $this->first = $this->name->chopWithRegex($firstRegex, 0);
danielebarchiesi@6 200 if (!$this->first && $this->category != 5){
danielebarchiesi@6 201 throw new Exception("Couldn't find a first name in '{$this->name->getStr()}'");
danielebarchiesi@6 202 }
danielebarchiesi@6 203
danielebarchiesi@6 204 // if anything's left, that's the middle name
danielebarchiesi@6 205 $this->middle = $this->name->getStr();
danielebarchiesi@6 206 return true;
danielebarchiesi@6 207 }
danielebarchiesi@6 208
danielebarchiesi@6 209
danielebarchiesi@6 210
danielebarchiesi@6 211
danielebarchiesi@6 212
danielebarchiesi@6 213 }
danielebarchiesi@6 214 ?>