Mercurial > hg > rr-repo
diff modules/biblio/includes/Name.php @ 6:a75ead649730
added biblio, admin_menu and reference modules
author | danieleb <danielebarchiesi@me.com> |
---|---|
date | Fri, 20 Sep 2013 11:18:21 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modules/biblio/includes/Name.php Fri Sep 20 11:18:21 2013 +0100 @@ -0,0 +1,102 @@ +<?php +/** + * Does cutting and matching stuff with a name string. + * Note that the string has to be UTF8-encoded. + * + */ +class HumanNameParser_Name { + private $str; + + function __construct($str) + { + $this->setStr($str); + } + + /** + * Checks encoding, normalizes whitespace/punctuation, and sets the name string. + * + * @param String $str a utf8-encoding string. + * @return Bool True on success + */ + public function setStr($str) + { + if (!drupal_validate_utf8($str)){ + throw new Exception("Name is not encoded in UTF-8"); + } + $this->str = $str; + $this->norm(); + return true; + } + + public function getStr() + { + return $this->str; + } + + + /** + * Uses a regex to chop off and return part of the namestring + * There are two parts: first, it returns the matched substring, + * and then it removes that substring from $this->str and normalizes. + * + * @param string $regex matches the part of the namestring to chop off + * @param integer $submatchIndex which of the parenthesized submatches to use + * @param string $regexFlags optional regex flags + * @return string the part of the namestring that got chopped off + */ + public function chopWithRegex($regex, $submatchIndex = 0, $regexFlags = '') + { + $regex = $regex . "ui" . $regexFlags; // unicode + case-insensitive + preg_match($regex, $this->str, $m); + $subset = (isset($m[$submatchIndex])) ? $m[$submatchIndex] : ''; + + if ($subset){ + $this->str = preg_replace($regex, ' ', $this->str, -1, $numReplacements); + if ($numReplacements > 1){ + throw new Exception("The regex being used to find the name: '$this->str' has multiple matches."); + } + $this->norm(); + return $subset; + } + else { + return ''; + } + } + + /* + * Flips the front and back parts of a name with one another. + * Front and back are determined by a specified character somewhere in the + * middle of the string. + * + * @param String $flipAroundChar the character(s) demarcating the two halves you want to flip. + * @return Bool True on success. + */ + public function flip($flipAroundChar) + { + $substrings = preg_split("/$flipAroundChar/u", $this->str); + if (count($substrings) == 2){ + $this->str = $substrings[1] . " " . $substrings[0]; + $this->norm(); + } + else if (count($substrings) > 2) { + throw new Exception("Can't flip around multiple '$flipAroundChar' characters in: '$this->str'."); + } + return true; // if there's 1 or 0 $flipAroundChar found + } + + /** + * Removes extra whitespace and punctuation from $this->str + * Strips whitespace chars from ends, strips redundant whitespace, converts whitespace chars to " ". + * + * @return Bool True on success + */ + private function norm() + { + $this->str = preg_replace( "#^\s*#u", "", $this->str ); + $this->str = preg_replace( "#\s*$#u", "", $this->str ); + $this->str = preg_replace( "#\s+#u", " ", $this->str ); + $this->str = preg_replace( "#,$#u", " ", $this->str ); + return true; + } +} +?>