Mercurial > hg > rr-repo
view modules/biblio/includes/biblio.util.inc @ 6:a75ead649730
added biblio, admin_menu and reference modules
author | danieleb <danielebarchiesi@me.com> |
---|---|
date | Fri, 20 Sep 2013 11:18:21 +0100 |
parents | |
children |
line wrap: on
line source
<?php function biblio_normalize_title($title) { $stop_words = 'a,an,the,is,on'; $stop_words = explode(',', variable_get('biblio_stop_words', $stop_words)); if (!@preg_match('/\pL/u', 'a')) { // probably a broken PCRE library $title = trim(_strip_punctuation($title)); } else { // Unicode safe filter for the value $title = trim(_strip_punctuation_utf8($title)); } $title = trim(_strip_punctuation($title)); if (function_exists('mb_regex_encoding')) { mb_regex_encoding("utf-8"); $title_words = mb_split(' +', $title); } else { $title_words = explode(' ', $title); } while (array_search(drupal_strtolower($title_words[0]), $stop_words) !== FALSE) { array_shift($title_words); } return drupal_substr(implode(' ', $title_words), 0, 64); } function biblio_coins($node) { // Copyright: Matthias Steffens <mailto:refbase@extracts.de> and the file's // original author. // Original Author: Richard Karnesky <mailto:karnesky@gmail.com> // // Adapted for biblio: Ron Jerome // fmt_info (type) $fmt = "info:ofi/fmt:kev:mtx:"; // 'dissertation' is compatible with the 1.0 spec, but not the 0.1 spec if ($node->biblio_type == 108) { $fmt .= "dissertation"; } elseif ($node->biblio_type == 102) { $fmt .= "journal"; } elseif ($node->biblio_type == 100 || $node->biblio_type == 101) { $fmt .= "book"; } // 'dc' (dublin core) is compatible with the 1.0 spec, but not the 0.1 spec. // We default to this, as it is the most generic type. else { $fmt .= "dc"; } $co = biblio_contextObject($node); $coins = "ctx_ver=Z39.88-2004&rft_val_fmt=" . urlencode($fmt); foreach ($co as $coKey => $coValue) { // 'urlencode()' differs from 'rawurlencode() (i.e., RFC1738 encoding) // in that spaces are encoded as plus (+) signs $coKey = preg_replace("/au[0-9]*/", "au", $coKey); $coins .= "&" . $coKey . "=" . urlencode($coValue); } $coinsSpan = "<span class=\"Z3988\" title=\"" . $coins . "\"></span>"; return $coinsSpan; } function biblio_contextObject($node) { // Copyright: Matthias Steffens <mailto:refbase@extracts.de> and the file's // original author. // Original Author: Richard Karnesky <mailto:karnesky@gmail.com> // // Adapted for biblio: Ron Jerome global $base_url; $i = 0; // $openurl_base = variable_get('biblio_baseopenurl', ''); $co = array(); // rfr_id // $co["rfr_id"] = "info:sid/". ereg_replace("http://", "", $base_url); // // genre (type) // if (isset($node->biblio_type)) { // if ($node->biblio_type == 102) // $co["rft.genre"] = "article"; // elseif ($node->biblio_type == 101) $co["rft.genre"] = "bookitem"; // elseif ($node->biblio_type == 100) $co["rft.genre"] = "book"; // elseif ($node->biblio_type == "Journal") $co["rft.genre"] = "journal"; // } // atitle, btitle, title (title, publication) if (($node->biblio_type == 102) || ($node->biblio_type == 101)) { if (!empty($node->title)) $co["rft.atitle"] = check_plain($node->title); if (!empty($node->biblio_secondary_title)) { $co["rft.title"] = check_plain($node->biblio_secondary_title); if ($node->biblio_type == 101) $co["rft.btitle"] = check_plain($node->biblio_secondary_title); } } elseif (!empty($node->title)) { $co["rft.title"] = check_plain($node->title); } if (($node->biblio_type == 100) && (!empty($node->biblio_secondary_title))) $co["rft.btitle"] = check_plain($node->biblio_secondary_title); // stitle (abbrev_journal) if (!empty($node->biblio_short_title)) $co["rft.stitle"] = check_plain($node->biblio_short_title); // series (series_title) if (!empty($node->biblio_tertiary_title)) $co["rft.series"] = check_plain($node->biblio_tertiary_title); // issn if (!empty($node->biblio_issn)) $co["rft.issn"] = check_plain($node->biblio_issn); // isbn if (!empty($node->biblio_isbn)) $co["rft.isbn"] = check_plain($node->biblio_isbn); // date (year) if (!empty($node->biblio_year)) $co["rft.date"] = check_plain($node->biblio_year); // volume if (!empty($node->biblio_volume)) $co["rft.volume"] = check_plain($node->biblio_volume); // issue if (!empty($node->biblio_issue)) $co["rft.issue"] = check_plain($node->biblio_issue); // spage, epage, tpages (pages) // NOTE: lifted from modsxml.inc.php--should throw some into a new include file if (!empty($node->biblio_pages)) { if (preg_match("/[0-9] *- *[0-9]/", $node->biblio_pages)) { list ($pagestart, $pageend) = preg_split('/\s*[-]\s*/', $node->biblio_pages); if ($pagestart < $pageend) { $co["rft.spage"] = check_plain($pagestart); $co["rft.epage"] = check_plain($pageend); } } elseif ($node->biblio_type == 100) { //"Book Whole") { $pagetotal = preg_replace('/^(\d+)\s*pp?\.?$/', "\\1", $node->biblio_pages); $co["rft.tpages"] = check_plain($pagetotal); } else { $co["rft.spage"] = check_plain($node->biblio_pages); } } // aulast, aufirst, author (author) if (!empty($node->biblio_contributors)) { if (!empty($node->biblio_contributors[0]['lastname'])) { $co["rft.aulast"] = check_plain($node->biblio_contributors[0]['lastname']); } if (!empty($node->biblio_contributors[0]['firstname'])) { $co["rft.aufirst"] = check_plain($node->biblio_contributors[0]['firstname']); } elseif (!empty($node->biblio_contributors[0]['initials'])) { $co["rft.auinit"] = check_plain($node->biblio_contributors[0]['initials']); } for($i = 1; $i < count($node->biblio_contributors); $i++) { $author = $node->biblio_contributors[$i]; if ($author['auth_category'] == 1) { if (!empty($author['lastname'])) { $au = $author['lastname']; if (!empty($author['firstname']) || !empty($author['initials'])) $au .= ", "; } if (!empty($author['firstname'])) { $au .= $author['firstname']; } elseif (!empty($author['initials'])) { $au .= $author['initials']; } if (!empty($au)) $co["rft.au" . $i] = $au; } } } // pub (publisher) if (!empty($node->biblio_publisher)) $co["rft.pub"] = check_plain($node->biblio_publisher); // place if (!empty($node->biblio_place_published)) $co["rft.place"] = check_plain($node->biblio_place_published); // id (doi, url) if (!empty($node->biblio_doi)) { $co["rft_id"] = "info:doi/" . check_plain($node->biblio_doi); } // elseif (!empty($node->biblio_url)) { // $co["rft_id"] = $node->biblio_url; // } return $co; } function biblio_coins_generate(& $node) { if (!isset($node->vid)) { $node->biblio_coins = biblio_coins($node); return; } if ($node) { $node->biblio_coins = biblio_coins($node); db_update('biblio') ->fields(array('biblio_coins' => $node->biblio_coins)) ->condition('vid', $node->vid) ->execute(); } else { $result = db_query("SELECT nr.*, b.* FROM {node} AS n LEFT JOIN {node_revision} AS nr ON n.vid = nr.vid LEFT JOIN {biblio} AS b ON n.vid = b.vid WHERE n.type = 'biblio' "); foreach ($result as $node) { $node->biblio_coins = biblio_coins($node); db_update('biblio') ->fields(array('biblio_coins' => $node->biblio_coins)) ->condition('vid', $node->vid) ->execute(); } drupal_goto(''); } } function _strip_punctuation($text) { return preg_replace("/[[:punct:]]/", '', $text); } /** * Copyright (c) 2008, David R. Nadeau, NadeauSoftware.com. * All rights reserved. * * Strip punctuation characters from UTF-8 text. * * Characters stripped from the text include characters in the following * Unicode categories: * * Separators * Control characters * Formatting characters * Surrogates * Open and close quotes * Open and close brackets * Dashes * Connectors * Numer separators * Spaces * Other punctuation * * Exceptions are made for punctuation characters that occur withn URLs * (such as [ ] : ; @ & ? and others), within numbers (such as . , % # '), * and within words (such as - and '). * * Parameters: * text the UTF-8 text to strip * * Return values: * the stripped UTF-8 text. * * See also: * http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page */ function _strip_punctuation_utf8( $text ) { $urlbrackets = '\[\]\(\)'; $urlspacebefore = ':;\'_\*%@&?!' . $urlbrackets; $urlspaceafter = '\.,:;\'\-_\*@&\/\\\\\?!#' . $urlbrackets; $urlall = '\.,:;\'\-_\*%@&\/\\\\\?!#' . $urlbrackets; $specialquotes = '\'"\*<>'; $fullstop = '\x{002E}\x{FE52}\x{FF0E}'; $comma = '\x{002C}\x{FE50}\x{FF0C}'; $arabsep = '\x{066B}\x{066C}'; $numseparators = $fullstop . $comma . $arabsep; $numbersign = '\x{0023}\x{FE5F}\x{FF03}'; $percent = '\x{066A}\x{0025}\x{066A}\x{FE6A}\x{FF05}\x{2030}\x{2031}'; $prime = '\x{2032}\x{2033}\x{2034}\x{2057}'; $nummodifiers = $numbersign . $percent . $prime; return preg_replace( array( // Remove separator, control, formatting, surrogate, // open/close quotes. '/[\p{Z}\p{Cc}\p{Cf}\p{Cs}\p{Pi}\p{Pf}]/u', // Remove other punctuation except special cases '/\p{Po}(?<![' . $specialquotes . $numseparators . $urlall . $nummodifiers . '])/u', // Remove non-URL open/close brackets, except URL brackets. '/[\p{Ps}\p{Pe}](?<![' . $urlbrackets . '])/u', // Remove special quotes, dashes, connectors, number // separators, and URL characters followed by a space '/[' . $specialquotes . $numseparators . $urlspaceafter . '\p{Pd}\p{Pc}]+((?= )|$)/u', // Remove special quotes, connectors, and URL characters // preceded by a space '/((?<= )|^)[' . $specialquotes . $urlspacebefore . '\p{Pc}]+/u', // Remove dashes preceded by a space, but not followed by a number '/((?<= )|^)\p{Pd}+(?![\p{N}\p{Sc}])/u', // Remove consecutive spaces '/ +/', ), ' ', $text ); } /** * Copyright (c) 2008, David R. Nadeau, NadeauSoftware.com. * All rights reserved. * * Strip symbol characters from UTF-8 text. * * Characters stripped from the text include characters in the following * Unicode categories: * * Modifier symbols * Private use symbols * Math symbols * Other symbols * * Exceptions are made for math symbols embedded within numbers (such as * + - /), math symbols used within URLs (such as = ~), units of measure * symbols, and ideograph parts. Currency symbols are not removed. * * Parameters: * text the UTF-8 text to strip * * Return values: * the stripped UTF-8 text. * * See also: * http://nadeausoftware.com/articles/2007/09/php_tip_how_strip_symbol_characters_web_page */ function _strip_symbols( $text ) { $plus = '\+\x{FE62}\x{FF0B}\x{208A}\x{207A}'; $minus = '\x{2012}\x{208B}\x{207B}'; $units = '\\x{00B0}\x{2103}\x{2109}\\x{23CD}'; $units .= '\\x{32CC}-\\x{32CE}'; $units .= '\\x{3300}-\\x{3357}'; $units .= '\\x{3371}-\\x{33DF}'; $units .= '\\x{33FF}'; $ideo = '\\x{2E80}-\\x{2EF3}'; $ideo .= '\\x{2F00}-\\x{2FD5}'; $ideo .= '\\x{2FF0}-\\x{2FFB}'; $ideo .= '\\x{3037}-\\x{303F}'; $ideo .= '\\x{3190}-\\x{319F}'; $ideo .= '\\x{31C0}-\\x{31CF}'; $ideo .= '\\x{32C0}-\\x{32CB}'; $ideo .= '\\x{3358}-\\x{3370}'; $ideo .= '\\x{33E0}-\\x{33FE}'; $ideo .= '\\x{A490}-\\x{A4C6}'; return preg_replace( array( // Remove modifier and private use symbols. '/[\p{Sk}\p{Co}]/u', // Remove math symbols except + - = ~ and fraction slash '/\p{Sm}(?<![' . $plus . $minus . '=~\x{2044}])/u', // Remove + - if space before, no number or currency after '/((?<= )|^)[' . $plus . $minus . ']+((?![\p{N}\p{Sc}])|$)/u', // Remove = if space before '/((?<= )|^)=+/u', // Remove + - = ~ if space after '/[' . $plus . $minus . '=~]+((?= )|$)/u', // Remove other symbols except units and ideograph parts '/\p{So}(?<![' . $units . $ideo . '])/u', // Remove consecutive white space '/ +/', ), ' ', $text ); } /** * Remove HTML tags, including invisible text such as style and * script code, and embedded objects. Add line breaks around * block-level tags to prevent word joining after tag removal. */ function _strip_html_tags( $text ) { $text = preg_replace( array( // Remove invisible content '@<head[^>]*?>.*?</head>@siu', '@<style[^>]*?>.*?</style>@siu', '@<script[^>]*?.*?</script>@siu', '@<object[^>]*?.*?</object>@siu', '@<embed[^>]*?.*?</embed>@siu', '@<applet[^>]*?.*?</applet>@siu', '@<noframes[^>]*?.*?</noframes>@siu', '@<noscript[^>]*?.*?</noscript>@siu', '@<noembed[^>]*?.*?</noembed>@siu', // Add line breaks before and after blocks '@</?((address)|(blockquote)|(center)|(del))@iu', '@</?((div)|(h[1-9])|(ins)|(isindex)|(p)|(pre))@iu', '@</?((dir)|(dl)|(dt)|(dd)|(li)|(menu)|(ol)|(ul))@iu', '@</?((table)|(th)|(td)|(caption))@iu', '@</?((form)|(button)|(fieldset)|(legend)|(input))@iu', '@</?((label)|(select)|(optgroup)|(option)|(textarea))@iu', '@</?((frameset)|(frame)|(iframe))@iu', ), array( ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0", ), $text ); return strip_tags( $text ); }