danielebarchiesi@6
|
1 <?php
|
danielebarchiesi@6
|
2 /**
|
danielebarchiesi@6
|
3 * Does cutting and matching stuff with a name string.
|
danielebarchiesi@6
|
4 * Note that the string has to be UTF8-encoded.
|
danielebarchiesi@6
|
5 *
|
danielebarchiesi@6
|
6 */
|
danielebarchiesi@6
|
7 class HumanNameParser_Name {
|
danielebarchiesi@6
|
8 private $str;
|
danielebarchiesi@6
|
9
|
danielebarchiesi@6
|
10 function __construct($str)
|
danielebarchiesi@6
|
11 {
|
danielebarchiesi@6
|
12 $this->setStr($str);
|
danielebarchiesi@6
|
13 }
|
danielebarchiesi@6
|
14
|
danielebarchiesi@6
|
15 /**
|
danielebarchiesi@6
|
16 * Checks encoding, normalizes whitespace/punctuation, and sets the name string.
|
danielebarchiesi@6
|
17 *
|
danielebarchiesi@6
|
18 * @param String $str a utf8-encoding string.
|
danielebarchiesi@6
|
19 * @return Bool True on success
|
danielebarchiesi@6
|
20 */
|
danielebarchiesi@6
|
21 public function setStr($str)
|
danielebarchiesi@6
|
22 {
|
danielebarchiesi@6
|
23 if (!drupal_validate_utf8($str)){
|
danielebarchiesi@6
|
24 throw new Exception("Name is not encoded in UTF-8");
|
danielebarchiesi@6
|
25 }
|
danielebarchiesi@6
|
26 $this->str = $str;
|
danielebarchiesi@6
|
27 $this->norm();
|
danielebarchiesi@6
|
28 return true;
|
danielebarchiesi@6
|
29 }
|
danielebarchiesi@6
|
30
|
danielebarchiesi@6
|
31 public function getStr()
|
danielebarchiesi@6
|
32 {
|
danielebarchiesi@6
|
33 return $this->str;
|
danielebarchiesi@6
|
34 }
|
danielebarchiesi@6
|
35
|
danielebarchiesi@6
|
36
|
danielebarchiesi@6
|
37 /**
|
danielebarchiesi@6
|
38 * Uses a regex to chop off and return part of the namestring
|
danielebarchiesi@6
|
39 * There are two parts: first, it returns the matched substring,
|
danielebarchiesi@6
|
40 * and then it removes that substring from $this->str and normalizes.
|
danielebarchiesi@6
|
41 *
|
danielebarchiesi@6
|
42 * @param string $regex matches the part of the namestring to chop off
|
danielebarchiesi@6
|
43 * @param integer $submatchIndex which of the parenthesized submatches to use
|
danielebarchiesi@6
|
44 * @param string $regexFlags optional regex flags
|
danielebarchiesi@6
|
45 * @return string the part of the namestring that got chopped off
|
danielebarchiesi@6
|
46 */
|
danielebarchiesi@6
|
47 public function chopWithRegex($regex, $submatchIndex = 0, $regexFlags = '')
|
danielebarchiesi@6
|
48 {
|
danielebarchiesi@6
|
49 $regex = $regex . "ui" . $regexFlags; // unicode + case-insensitive
|
danielebarchiesi@6
|
50 preg_match($regex, $this->str, $m);
|
danielebarchiesi@6
|
51 $subset = (isset($m[$submatchIndex])) ? $m[$submatchIndex] : '';
|
danielebarchiesi@6
|
52
|
danielebarchiesi@6
|
53 if ($subset){
|
danielebarchiesi@6
|
54 $this->str = preg_replace($regex, ' ', $this->str, -1, $numReplacements);
|
danielebarchiesi@6
|
55 if ($numReplacements > 1){
|
danielebarchiesi@6
|
56 throw new Exception("The regex being used to find the name: '$this->str' has multiple matches.");
|
danielebarchiesi@6
|
57 }
|
danielebarchiesi@6
|
58 $this->norm();
|
danielebarchiesi@6
|
59 return $subset;
|
danielebarchiesi@6
|
60 }
|
danielebarchiesi@6
|
61 else {
|
danielebarchiesi@6
|
62 return '';
|
danielebarchiesi@6
|
63 }
|
danielebarchiesi@6
|
64 }
|
danielebarchiesi@6
|
65
|
danielebarchiesi@6
|
66 /*
|
danielebarchiesi@6
|
67 * Flips the front and back parts of a name with one another.
|
danielebarchiesi@6
|
68 * Front and back are determined by a specified character somewhere in the
|
danielebarchiesi@6
|
69 * middle of the string.
|
danielebarchiesi@6
|
70 *
|
danielebarchiesi@6
|
71 * @param String $flipAroundChar the character(s) demarcating the two halves you want to flip.
|
danielebarchiesi@6
|
72 * @return Bool True on success.
|
danielebarchiesi@6
|
73 */
|
danielebarchiesi@6
|
74 public function flip($flipAroundChar)
|
danielebarchiesi@6
|
75 {
|
danielebarchiesi@6
|
76 $substrings = preg_split("/$flipAroundChar/u", $this->str);
|
danielebarchiesi@6
|
77 if (count($substrings) == 2){
|
danielebarchiesi@6
|
78 $this->str = $substrings[1] . " " . $substrings[0];
|
danielebarchiesi@6
|
79 $this->norm();
|
danielebarchiesi@6
|
80 }
|
danielebarchiesi@6
|
81 else if (count($substrings) > 2) {
|
danielebarchiesi@6
|
82 throw new Exception("Can't flip around multiple '$flipAroundChar' characters in: '$this->str'.");
|
danielebarchiesi@6
|
83 }
|
danielebarchiesi@6
|
84 return true; // if there's 1 or 0 $flipAroundChar found
|
danielebarchiesi@6
|
85 }
|
danielebarchiesi@6
|
86
|
danielebarchiesi@6
|
87 /**
|
danielebarchiesi@6
|
88 * Removes extra whitespace and punctuation from $this->str
|
danielebarchiesi@6
|
89 * Strips whitespace chars from ends, strips redundant whitespace, converts whitespace chars to " ".
|
danielebarchiesi@6
|
90 *
|
danielebarchiesi@6
|
91 * @return Bool True on success
|
danielebarchiesi@6
|
92 */
|
danielebarchiesi@6
|
93 private function norm()
|
danielebarchiesi@6
|
94 {
|
danielebarchiesi@6
|
95 $this->str = preg_replace( "#^\s*#u", "", $this->str );
|
danielebarchiesi@6
|
96 $this->str = preg_replace( "#\s*$#u", "", $this->str );
|
danielebarchiesi@6
|
97 $this->str = preg_replace( "#\s+#u", " ", $this->str );
|
danielebarchiesi@6
|
98 $this->str = preg_replace( "#,$#u", " ", $this->str );
|
danielebarchiesi@6
|
99 return true;
|
danielebarchiesi@6
|
100 }
|
danielebarchiesi@6
|
101 }
|
danielebarchiesi@6
|
102 ?>
|