comparison modules/biblio/includes/Name.php @ 6:a75ead649730

added biblio, admin_menu and reference modules
author danieleb <danielebarchiesi@me.com>
date Fri, 20 Sep 2013 11:18:21 +0100
parents
children
comparison
equal deleted inserted replaced
5:d72257b2ddc2 6:a75ead649730
1 <?php
2 /**
3 * Does cutting and matching stuff with a name string.
4 * Note that the string has to be UTF8-encoded.
5 *
6 */
7 class HumanNameParser_Name {
8 private $str;
9
10 function __construct($str)
11 {
12 $this->setStr($str);
13 }
14
15 /**
16 * Checks encoding, normalizes whitespace/punctuation, and sets the name string.
17 *
18 * @param String $str a utf8-encoding string.
19 * @return Bool True on success
20 */
21 public function setStr($str)
22 {
23 if (!drupal_validate_utf8($str)){
24 throw new Exception("Name is not encoded in UTF-8");
25 }
26 $this->str = $str;
27 $this->norm();
28 return true;
29 }
30
31 public function getStr()
32 {
33 return $this->str;
34 }
35
36
37 /**
38 * Uses a regex to chop off and return part of the namestring
39 * There are two parts: first, it returns the matched substring,
40 * and then it removes that substring from $this->str and normalizes.
41 *
42 * @param string $regex matches the part of the namestring to chop off
43 * @param integer $submatchIndex which of the parenthesized submatches to use
44 * @param string $regexFlags optional regex flags
45 * @return string the part of the namestring that got chopped off
46 */
47 public function chopWithRegex($regex, $submatchIndex = 0, $regexFlags = '')
48 {
49 $regex = $regex . "ui" . $regexFlags; // unicode + case-insensitive
50 preg_match($regex, $this->str, $m);
51 $subset = (isset($m[$submatchIndex])) ? $m[$submatchIndex] : '';
52
53 if ($subset){
54 $this->str = preg_replace($regex, ' ', $this->str, -1, $numReplacements);
55 if ($numReplacements > 1){
56 throw new Exception("The regex being used to find the name: '$this->str' has multiple matches.");
57 }
58 $this->norm();
59 return $subset;
60 }
61 else {
62 return '';
63 }
64 }
65
66 /*
67 * Flips the front and back parts of a name with one another.
68 * Front and back are determined by a specified character somewhere in the
69 * middle of the string.
70 *
71 * @param String $flipAroundChar the character(s) demarcating the two halves you want to flip.
72 * @return Bool True on success.
73 */
74 public function flip($flipAroundChar)
75 {
76 $substrings = preg_split("/$flipAroundChar/u", $this->str);
77 if (count($substrings) == 2){
78 $this->str = $substrings[1] . " " . $substrings[0];
79 $this->norm();
80 }
81 else if (count($substrings) > 2) {
82 throw new Exception("Can't flip around multiple '$flipAroundChar' characters in: '$this->str'.");
83 }
84 return true; // if there's 1 or 0 $flipAroundChar found
85 }
86
87 /**
88 * Removes extra whitespace and punctuation from $this->str
89 * Strips whitespace chars from ends, strips redundant whitespace, converts whitespace chars to " ".
90 *
91 * @return Bool True on success
92 */
93 private function norm()
94 {
95 $this->str = preg_replace( "#^\s*#u", "", $this->str );
96 $this->str = preg_replace( "#\s*$#u", "", $this->str );
97 $this->str = preg_replace( "#\s+#u", " ", $this->str );
98 $this->str = preg_replace( "#,$#u", " ", $this->str );
99 return true;
100 }
101 }
102 ?>