diff modules/biblio/includes/Name.php @ 6:a75ead649730

added biblio, admin_menu and reference modules
author danieleb <danielebarchiesi@me.com>
date Fri, 20 Sep 2013 11:18:21 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/modules/biblio/includes/Name.php	Fri Sep 20 11:18:21 2013 +0100
@@ -0,0 +1,102 @@
+<?php
+/**
+ * Does cutting and matching stuff with a name string.
+ * Note that the string has to be UTF8-encoded.
+ *
+ */
+class HumanNameParser_Name {
+  private $str;
+
+  function __construct($str)
+  {
+    $this->setStr($str);
+  }
+
+  /**
+   * Checks encoding, normalizes whitespace/punctuation, and sets the name string.
+   *
+   * @param String $str a utf8-encoding string.
+   * @return Bool True on success
+   */
+  public function setStr($str)
+  {
+    if (!drupal_validate_utf8($str)){
+      throw new Exception("Name is not encoded in UTF-8");
+    }
+    $this->str = $str;
+    $this->norm();
+    return true;
+  }
+
+  public function getStr()
+  {
+    return $this->str;
+  }
+
+
+  /**
+   * Uses a regex to chop off and return part of the namestring
+   * There are two parts: first, it returns the matched substring,
+   * and then it removes that substring from $this->str and normalizes.
+   *
+   * @param string $regex matches the part of the namestring to chop off
+   * @param integer $submatchIndex	which of the parenthesized submatches to use
+   * @param string	$regexFlags	optional regex flags
+   * @return string	the part of the namestring that got chopped off
+   */
+  public function chopWithRegex($regex, $submatchIndex = 0, $regexFlags = '')
+  {
+    $regex = $regex . "ui" . $regexFlags; // unicode + case-insensitive
+    preg_match($regex, $this->str, $m);
+    $subset = (isset($m[$submatchIndex])) ? $m[$submatchIndex] : '';
+
+    if ($subset){
+      $this->str = preg_replace($regex, ' ', $this->str, -1, $numReplacements);
+      if ($numReplacements > 1){
+        throw new Exception("The regex being used to find the name: '$this->str' has multiple matches.");
+      }
+      $this->norm();
+      return $subset;
+    }
+    else {
+      return '';
+    }
+  }
+
+  /*
+   * Flips the front and back parts of a name with one another.
+  * Front and back are determined by a specified character somewhere in the
+  * middle of the string.
+  *
+  * @param	String $flipAroundChar	the character(s) demarcating the two halves you want to flip.
+  * @return Bool True on success.
+  */
+  public function flip($flipAroundChar)
+  {
+    $substrings = preg_split("/$flipAroundChar/u", $this->str);
+    if (count($substrings) == 2){
+      $this->str = $substrings[1] . " " . $substrings[0];
+      $this->norm();
+    }
+    else if (count($substrings) > 2) {
+      throw new Exception("Can't flip around multiple '$flipAroundChar' characters in: '$this->str'.");
+    }
+    return true; // if there's 1 or 0 $flipAroundChar found
+  }
+
+  /**
+   * Removes extra whitespace and punctuation from $this->str
+   * Strips whitespace chars from ends, strips redundant whitespace, converts whitespace chars to " ".
+   *
+   * @return Bool True on success
+   */
+  private function norm()
+  {
+    $this->str = preg_replace( "#^\s*#u", "", $this->str );
+    $this->str = preg_replace( "#\s*$#u", "", $this->str );
+    $this->str = preg_replace( "#\s+#u", " ", $this->str );
+    $this->str = preg_replace( "#,$#u", " ", $this->str );
+    return true;
+  }
+}
+?>