Mercurial > hg > rr-repo
comparison modules/biblio/includes/Name.php @ 6:a75ead649730
added biblio, admin_menu and reference modules
author | danieleb <danielebarchiesi@me.com> |
---|---|
date | Fri, 20 Sep 2013 11:18:21 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:d72257b2ddc2 | 6:a75ead649730 |
---|---|
1 <?php | |
2 /** | |
3 * Does cutting and matching stuff with a name string. | |
4 * Note that the string has to be UTF8-encoded. | |
5 * | |
6 */ | |
7 class HumanNameParser_Name { | |
8 private $str; | |
9 | |
10 function __construct($str) | |
11 { | |
12 $this->setStr($str); | |
13 } | |
14 | |
15 /** | |
16 * Checks encoding, normalizes whitespace/punctuation, and sets the name string. | |
17 * | |
18 * @param String $str a utf8-encoding string. | |
19 * @return Bool True on success | |
20 */ | |
21 public function setStr($str) | |
22 { | |
23 if (!drupal_validate_utf8($str)){ | |
24 throw new Exception("Name is not encoded in UTF-8"); | |
25 } | |
26 $this->str = $str; | |
27 $this->norm(); | |
28 return true; | |
29 } | |
30 | |
31 public function getStr() | |
32 { | |
33 return $this->str; | |
34 } | |
35 | |
36 | |
37 /** | |
38 * Uses a regex to chop off and return part of the namestring | |
39 * There are two parts: first, it returns the matched substring, | |
40 * and then it removes that substring from $this->str and normalizes. | |
41 * | |
42 * @param string $regex matches the part of the namestring to chop off | |
43 * @param integer $submatchIndex which of the parenthesized submatches to use | |
44 * @param string $regexFlags optional regex flags | |
45 * @return string the part of the namestring that got chopped off | |
46 */ | |
47 public function chopWithRegex($regex, $submatchIndex = 0, $regexFlags = '') | |
48 { | |
49 $regex = $regex . "ui" . $regexFlags; // unicode + case-insensitive | |
50 preg_match($regex, $this->str, $m); | |
51 $subset = (isset($m[$submatchIndex])) ? $m[$submatchIndex] : ''; | |
52 | |
53 if ($subset){ | |
54 $this->str = preg_replace($regex, ' ', $this->str, -1, $numReplacements); | |
55 if ($numReplacements > 1){ | |
56 throw new Exception("The regex being used to find the name: '$this->str' has multiple matches."); | |
57 } | |
58 $this->norm(); | |
59 return $subset; | |
60 } | |
61 else { | |
62 return ''; | |
63 } | |
64 } | |
65 | |
66 /* | |
67 * Flips the front and back parts of a name with one another. | |
68 * Front and back are determined by a specified character somewhere in the | |
69 * middle of the string. | |
70 * | |
71 * @param String $flipAroundChar the character(s) demarcating the two halves you want to flip. | |
72 * @return Bool True on success. | |
73 */ | |
74 public function flip($flipAroundChar) | |
75 { | |
76 $substrings = preg_split("/$flipAroundChar/u", $this->str); | |
77 if (count($substrings) == 2){ | |
78 $this->str = $substrings[1] . " " . $substrings[0]; | |
79 $this->norm(); | |
80 } | |
81 else if (count($substrings) > 2) { | |
82 throw new Exception("Can't flip around multiple '$flipAroundChar' characters in: '$this->str'."); | |
83 } | |
84 return true; // if there's 1 or 0 $flipAroundChar found | |
85 } | |
86 | |
87 /** | |
88 * Removes extra whitespace and punctuation from $this->str | |
89 * Strips whitespace chars from ends, strips redundant whitespace, converts whitespace chars to " ". | |
90 * | |
91 * @return Bool True on success | |
92 */ | |
93 private function norm() | |
94 { | |
95 $this->str = preg_replace( "#^\s*#u", "", $this->str ); | |
96 $this->str = preg_replace( "#\s*$#u", "", $this->str ); | |
97 $this->str = preg_replace( "#\s+#u", " ", $this->str ); | |
98 $this->str = preg_replace( "#,$#u", " ", $this->str ); | |
99 return true; | |
100 } | |
101 } | |
102 ?> |