annotate modules/biblio/includes/Name.php @ 6:a75ead649730

added biblio, admin_menu and reference modules
author danieleb <danielebarchiesi@me.com>
date Fri, 20 Sep 2013 11:18:21 +0100
parents
children
rev   line source
danielebarchiesi@6 1 <?php
danielebarchiesi@6 2 /**
danielebarchiesi@6 3 * Does cutting and matching stuff with a name string.
danielebarchiesi@6 4 * Note that the string has to be UTF8-encoded.
danielebarchiesi@6 5 *
danielebarchiesi@6 6 */
danielebarchiesi@6 7 class HumanNameParser_Name {
danielebarchiesi@6 8 private $str;
danielebarchiesi@6 9
danielebarchiesi@6 10 function __construct($str)
danielebarchiesi@6 11 {
danielebarchiesi@6 12 $this->setStr($str);
danielebarchiesi@6 13 }
danielebarchiesi@6 14
danielebarchiesi@6 15 /**
danielebarchiesi@6 16 * Checks encoding, normalizes whitespace/punctuation, and sets the name string.
danielebarchiesi@6 17 *
danielebarchiesi@6 18 * @param String $str a utf8-encoding string.
danielebarchiesi@6 19 * @return Bool True on success
danielebarchiesi@6 20 */
danielebarchiesi@6 21 public function setStr($str)
danielebarchiesi@6 22 {
danielebarchiesi@6 23 if (!drupal_validate_utf8($str)){
danielebarchiesi@6 24 throw new Exception("Name is not encoded in UTF-8");
danielebarchiesi@6 25 }
danielebarchiesi@6 26 $this->str = $str;
danielebarchiesi@6 27 $this->norm();
danielebarchiesi@6 28 return true;
danielebarchiesi@6 29 }
danielebarchiesi@6 30
danielebarchiesi@6 31 public function getStr()
danielebarchiesi@6 32 {
danielebarchiesi@6 33 return $this->str;
danielebarchiesi@6 34 }
danielebarchiesi@6 35
danielebarchiesi@6 36
danielebarchiesi@6 37 /**
danielebarchiesi@6 38 * Uses a regex to chop off and return part of the namestring
danielebarchiesi@6 39 * There are two parts: first, it returns the matched substring,
danielebarchiesi@6 40 * and then it removes that substring from $this->str and normalizes.
danielebarchiesi@6 41 *
danielebarchiesi@6 42 * @param string $regex matches the part of the namestring to chop off
danielebarchiesi@6 43 * @param integer $submatchIndex which of the parenthesized submatches to use
danielebarchiesi@6 44 * @param string $regexFlags optional regex flags
danielebarchiesi@6 45 * @return string the part of the namestring that got chopped off
danielebarchiesi@6 46 */
danielebarchiesi@6 47 public function chopWithRegex($regex, $submatchIndex = 0, $regexFlags = '')
danielebarchiesi@6 48 {
danielebarchiesi@6 49 $regex = $regex . "ui" . $regexFlags; // unicode + case-insensitive
danielebarchiesi@6 50 preg_match($regex, $this->str, $m);
danielebarchiesi@6 51 $subset = (isset($m[$submatchIndex])) ? $m[$submatchIndex] : '';
danielebarchiesi@6 52
danielebarchiesi@6 53 if ($subset){
danielebarchiesi@6 54 $this->str = preg_replace($regex, ' ', $this->str, -1, $numReplacements);
danielebarchiesi@6 55 if ($numReplacements > 1){
danielebarchiesi@6 56 throw new Exception("The regex being used to find the name: '$this->str' has multiple matches.");
danielebarchiesi@6 57 }
danielebarchiesi@6 58 $this->norm();
danielebarchiesi@6 59 return $subset;
danielebarchiesi@6 60 }
danielebarchiesi@6 61 else {
danielebarchiesi@6 62 return '';
danielebarchiesi@6 63 }
danielebarchiesi@6 64 }
danielebarchiesi@6 65
danielebarchiesi@6 66 /*
danielebarchiesi@6 67 * Flips the front and back parts of a name with one another.
danielebarchiesi@6 68 * Front and back are determined by a specified character somewhere in the
danielebarchiesi@6 69 * middle of the string.
danielebarchiesi@6 70 *
danielebarchiesi@6 71 * @param String $flipAroundChar the character(s) demarcating the two halves you want to flip.
danielebarchiesi@6 72 * @return Bool True on success.
danielebarchiesi@6 73 */
danielebarchiesi@6 74 public function flip($flipAroundChar)
danielebarchiesi@6 75 {
danielebarchiesi@6 76 $substrings = preg_split("/$flipAroundChar/u", $this->str);
danielebarchiesi@6 77 if (count($substrings) == 2){
danielebarchiesi@6 78 $this->str = $substrings[1] . " " . $substrings[0];
danielebarchiesi@6 79 $this->norm();
danielebarchiesi@6 80 }
danielebarchiesi@6 81 else if (count($substrings) > 2) {
danielebarchiesi@6 82 throw new Exception("Can't flip around multiple '$flipAroundChar' characters in: '$this->str'.");
danielebarchiesi@6 83 }
danielebarchiesi@6 84 return true; // if there's 1 or 0 $flipAroundChar found
danielebarchiesi@6 85 }
danielebarchiesi@6 86
danielebarchiesi@6 87 /**
danielebarchiesi@6 88 * Removes extra whitespace and punctuation from $this->str
danielebarchiesi@6 89 * Strips whitespace chars from ends, strips redundant whitespace, converts whitespace chars to " ".
danielebarchiesi@6 90 *
danielebarchiesi@6 91 * @return Bool True on success
danielebarchiesi@6 92 */
danielebarchiesi@6 93 private function norm()
danielebarchiesi@6 94 {
danielebarchiesi@6 95 $this->str = preg_replace( "#^\s*#u", "", $this->str );
danielebarchiesi@6 96 $this->str = preg_replace( "#\s*$#u", "", $this->str );
danielebarchiesi@6 97 $this->str = preg_replace( "#\s+#u", " ", $this->str );
danielebarchiesi@6 98 $this->str = preg_replace( "#,$#u", " ", $this->str );
danielebarchiesi@6 99 return true;
danielebarchiesi@6 100 }
danielebarchiesi@6 101 }
danielebarchiesi@6 102 ?>