/** * @see Filter::process() * @param $citationDescription MetadataDescription * @return string a DOI or null */ function &process(&$citationDescription) { $nullVar = null; // Get the search strings $searchTemplates =& $this->_getSearchTemplates(); $searchStrings = $this->constructSearchStrings($searchTemplates, $citationDescription); // Run the searches, in order, until we have a result $searchParams = array('qt' => 'worldcat_org_all'); foreach ($searchStrings as $searchString) { $searchParams['q'] = $searchString; // Worldcat Web search; results are (mal-formed) XHTML if (is_null($result = $this->callWebService(WORLDCAT_WEBSERVICE_SEARCH, $searchParams, XSL_TRANSFORMER_DOCTYPE_STRING))) { return $nullVar; } // parse the OCLC numbers from search results String::regexp_match_all('/id="itemid_(\\d+)"/', $result, $matches); if (!empty($matches[1])) { break; } } // If we don't have an OCLC number, then we cannot get any metadata if (empty($matches[1])) { return $nullVar; } // use xISBN because it's free foreach ($matches[1] as $oclcId) { $isbns = $this->_oclcToIsbns($oclcId); if (is_array($isbns)) { break; } } if (is_null($isbns)) { return $nullVar; } $apiKey = $this->getApiKey(); if (empty($apiKey)) { // Use the first ISBN if we have multiple $citationDescription =& $this->_lookupXIsbn($isbns[0]); return $citationDescription; } elseif (!empty($isbns[0])) { // Worldcat lookup only works with an API key if (is_null($citationDescription =& $this->_lookupWorldcat($matches[1][0]))) { return $nullVar; } // Prefer ISBN from xISBN if possible if (!empty($isbns[0])) { $citationDescription->addStatement('ibsn', $isbns[0], null, true); } return $citationDescription; } // Nothing found return $nullVar; }
/** * Helper function: Closes all dangling XHTML tags in a string * Modified from http://milianw.de/code-snippets/close-html-tags * by Milian Wolff <*****@*****.**> * @param string * @return string */ function _closeTags($string, $open = false) { // Put all opened tags into an array String::regexp_match_all("#<([a-z]+)( .*)?(?!/)>#iU", $string, $result); $openedtags = $result[1]; // Put all closed tags into an array String::regexp_match_all("#</([a-z]+)>#iU", $string, $result); $closedtags = $result[1]; $len_opened = count($openedtags); $len_closed = count($closedtags); // All tags are closed if (count($closedtags) == $len_opened) { return $string; } $openedtags = array_reverse($openedtags); $closedtags = array_reverse($closedtags); if ($open) { // Open tags for ($i = 0; $i < $len_closed; $i++) { if (!in_array($closedtags[$i], $openedtags)) { $string = '<' . $closedtags[$i] . '>' . $string; } else { unset($openedtags[array_search($closedtags[$i], $openedtags)]); } } return $string; } else { // Close tags for ($i = 0; $i < $len_opened; $i++) { if (!in_array($openedtags[$i], $closedtags)) { $string .= '</' . $openedtags[$i] . '>'; } else { unset($closedtags[array_search($openedtags[$i], $closedtags)]); } } return $string; } }
/** * Converts a string with multiple persons * to an array of NLM name descriptions. * * @param $personsString string * @param $title boolean true to parse for title * @param $degrees boolean true to parse for degrees * @return array an array of NLM name descriptions or null * if the string could not be converted plus optionally a * single 'et-al' string. */ function &_parsePersonsString($personsString, $title, $degrees) { // Check for 'et al'. $personsStringBeforeEtal = String::strlen($personsString); $personsString = String::regexp_replace('/et ?al$/', '', $personsString); $etAl = $personsStringBeforeEtal == String::strlen($personsString) ? false : true; // Remove punctuation. $personsString = trim($personsString, ':;, '); // Cut the authors string into pieces. $personStrings = String::iterativeExplode(array(':', ';'), $personsString); // If we did not have success with simple patterns then try more complex // patterns to tokenize multiple-person strings. if (count($personStrings) == 1) { // The first pattern must match the whole string, the second is used // to extract names. $complexPersonsPatterns = array(array('/^((([^ \\t\\n\\r\\f\\v,.&]{2,}\\s*)+,\\s*([A-Z]\\.\\s*)+),\\s*)+(\\&|\\.\\s\\.\\s\\.)\\s*([^ \\t\\n\\r\\f\\v,.&]{2,}\\s*,\\s*([A-Z]\\.\\s*)+)$/i', '/(?:[^ \\t\\n\\r\\f\\v,.&]{2,}\\s*)+,\\s*(?:[A-Z]\\.\\s*)+/i'), array('/^((([^ \\t\\n\\r\\f\\v,&]+\\s+)+[^ \\t\\n\\r\\f\\v,&]+\\s*)[,&]\\s*)+(([^ \\t\\n\\r\\f\\v,&]+\\s+)+[^ \\t\\n\\r\\f\\v,&]+)/i', '/(?:(?:[^ \\t\\n\\r\\f\\v,&.]+|[^ \\t\\n\\r\\f\\v,&]{2,})\\s+)+(?:[^ \\t\\n\\r\\f\\v,&.]+|[^ \\t\\n\\r\\f\\v,&]{2,})/i')); $matched = false; foreach ($complexPersonsPatterns as $complexPersonsPattern) { // Break at the first pattern that matches. if ($matched = String::regexp_match($complexPersonsPattern[0], $personsString)) { // Retrieve names. $success = String::regexp_match_all($complexPersonsPattern[1], $personsString, $personStrings); assert($success && count($personStrings) == 1); $personStrings = $personStrings[0]; break; } } if (!$matched) { // If nothing matches then try to parse as a single person. $personStrings = array($personsString); } } // Parse persons. $persons = array(); foreach ($personStrings as $personString) { $persons[] =& $this->_parsePersonString($personString, $title, $degrees); } // Add et-al string. if ($etAl) { $persons[] = PERSON_STRING_FILTER_ETAL; } return $persons; }
/** * Given a locale string, get the list of parameter references of the * form {$myParameterName}. * @param $source string * @return array */ function getParameterNames($source) { $matches = null; String::regexp_match_all('/({\\$[^}]+})/', $source, $matches); array_shift($matches); // Knock the top element off the array if (isset($matches[0])) { return $matches[0]; } return array(); }
/** * Calculate the differences between two strings and * produce an array with three types of entries: added * substrings, deleted substrings and unchanged substrings. * * The calculation is optimized to identify the common * largest substring. * * The return value is an array of the following format: * * array( * array( diff-type => substring ), * array(...) * ) * * whereby diff-type can be one of: * -1 = deletion * 0 = common substring * 1 = addition * * @param $originalString string * @param $editedString string * @return array */ static function diff($originalString, $editedString) { // Split strings into character arrays (multi-byte compatible). foreach (array('originalStringCharacters' => $originalString, 'editedStringCharacters' => $editedString) as $characterArrayName => $string) { ${$characterArrayName} = array(); String::regexp_match_all('/./', $string, ${$characterArrayName}); if (isset(${$characterArrayName}[0])) { ${$characterArrayName} = ${$characterArrayName}[0]; } } // Determine the length of the strings. $originalStringLength = count($originalStringCharacters); $editedStringLength = count($editedStringCharacters); // Is there anything to compare? if ($originalStringLength == 0 && $editedStringLength == 0) { return array(); } // Is the original string empty? if ($originalStringLength == 0) { // Return the edited string as addition. return array(array(1 => $editedString)); } // Is the edited string empty? if ($editedStringLength == 0) { // Return the original string as deletion. return array(array(-1 => $originalString)); } // Initialize the local indices: // 1) Create a character index for the edited string. $characterIndex = array(); for ($characterPosition = 0; $characterPosition < $editedStringLength; $characterPosition++) { $characterIndex[$editedStringCharacters[$characterPosition]][] = $characterPosition; } // 2) Initialize the substring and the length index. $substringIndex = $lengthIndex = array(); // Iterate over the original string to identify // the largest common string. for ($originalPosition = 0; $originalPosition < $originalStringLength; $originalPosition++) { // Find all occurrences of the original character // in the target string. $comparedCharacter = $originalStringCharacters[$originalPosition]; // Do we have a commonality between the original string // and the edited string? if (isset($characterIndex[$comparedCharacter])) { // Loop over all commonalities. foreach ($characterIndex[$comparedCharacter] as $editedPosition) { // Calculate the current and the preceding position // ids for indexation. $currentPosition = $originalPosition . '-' . $editedPosition; $previousPosition = $originalPosition - 1 . '-' . ($editedPosition - 1); // Does the occurrence in the target string continue // an existing common substring or does it start // a new one? if (isset($substringIndex[$previousPosition])) { // This is a continuation of an existing common // substring... $newSubstring = $substringIndex[$previousPosition] . $comparedCharacter; $newSubstringLength = String::strlen($newSubstring); // Move the substring in the substring index. $substringIndex[$currentPosition] = $newSubstring; unset($substringIndex[$previousPosition]); // Move the substring in the length index. $lengthIndex[$newSubstringLength][$currentPosition] = $newSubstring; unset($lengthIndex[$newSubstringLength - 1][$previousPosition]); } else { // Start a new common substring... // Add the substring to the substring index. $substringIndex[$currentPosition] = $comparedCharacter; // Add the substring to the length index. $lengthIndex[1][$currentPosition] = $comparedCharacter; } } } } // If we have no commonalities at all then mark the original // string as deleted and the edited string as added and // return. if (empty($lengthIndex)) { return array(array(-1 => $originalString), array(1 => $editedString)); } // Pop the largest common substrings from the length index. end($lengthIndex); $largestSubstringLength = key($lengthIndex); // Take the first common substring if we have more than // one substring with the same length. // FIXME: Find a better heuristic for this decision. reset($lengthIndex[$largestSubstringLength]); $largestSubstringPosition = key($lengthIndex[$largestSubstringLength]); list($largestSubstringEndOriginal, $largestSubstringEndEdited) = explode('-', $largestSubstringPosition); $largestSubstring = $lengthIndex[$largestSubstringLength][$largestSubstringPosition]; // Add the largest common substring to the result set $diffResult = array(array(0 => $largestSubstring)); // Prepend the diff of the substrings before the common substring // to the result diff (by recursion). $precedingSubstringOriginal = String::substr($originalString, 0, $largestSubstringEndOriginal - $largestSubstringLength + 1); $precedingSubstringEdited = String::substr($editedString, 0, $largestSubstringEndEdited - $largestSubstringLength + 1); $diffResult = array_merge(String::diff($precedingSubstringOriginal, $precedingSubstringEdited), $diffResult); // Append the diff of the substrings after thr common substring // to the result diff (by recursion). $succeedingSubstringOriginal = String::substr($originalString, $largestSubstringEndOriginal + 1); $succeedingSubstringEdited = String::substr($editedString, $largestSubstringEndEdited + 1); $diffResult = array_merge($diffResult, String::diff($succeedingSubstringOriginal, $succeedingSubstringEdited)); // Return the array representing the diff. return $diffResult; }
/** * Get parameter values: Used internally for RT searches */ function getParameterNames($value) { $matches = null; String::regexp_match_all('/\\{\\$([a-zA-Z0-9]+)\\}/', $value, $matches); // Remove the entire string from the matches list return $matches[1]; }
/** * Retrieve all currently enabled products within the * given context as a two dimensional array with the * first key representing the product type, the second * key the product name and the value the product version. * * @param $context array the application context, only * products enabled in that context will be returned. * @return array */ function &getCurrentProducts($context) { if (count($context)) { // Construct the where clause for the plugin settings // context. $contextNames = array_keys($context); foreach ($contextNames as $contextLevel => $contextName) { // Transform from camel case to ..._... String::regexp_match_all('/[A-Z][a-z]*/', ucfirst($contextName), $words); $contextNames[$contextLevel] = strtolower(implode('_', $words[0])); } $contextWhereClause = 'AND ((' . implode('_id = ? AND ', $contextNames) . '_id = ?) OR v.sitewide = 1)'; } else { $contextWhereClause = ''; } $result =& $this->retrieve('SELECT v.* FROM versions v LEFT JOIN plugin_settings ps ON lower(v.product_class_name) = ps.plugin_name AND ps.setting_name = \'enabled\' ' . $contextWhereClause . ' WHERE v.current = 1 AND (ps.setting_value = \'1\' OR v.lazy_load <> 1)', $context, false); $productArray = array(); while (!$result->EOF) { $row =& $result->getRowAssoc(false); $productArray[$row['product_type']][$row['product']] =& $this->_returnVersionFromRow($row); $result->MoveNext(); } $result->_close(); unset($result); return $productArray; }
/** * Parse SQL content into individual SQL statements. * @param $sql string * @return array */ function &parseStatements(&$sql) { $statements = array(); $statementsTmp = explode($this->statementDelim, $sql); $currentStatement = ''; $numSingleQuotes = $numEscapedSingleQuotes = 0; // This method for parsing the SQL statements was adapted from one used in phpBB (http://www.phpbb.com/) for ($i = 0, $count = count($statementsTmp); $i < $count; $i++) { // Get total number of single quotes in string $numSingleQuotes += String::substr_count($statementsTmp[$i], "'"); // Get number of escaped single quotes $numEscapedSingleQuotes += String::regexp_match_all("/(?<!\\\\)(\\\\\\\\)*\\\\'/", $statementsTmp[$i], $matches); $currentStatement .= $statementsTmp[$i]; if (($numSingleQuotes - $numEscapedSingleQuotes) % 2 == 0) { // Even number of unescaped single quotes, so statement must be complete if (trim($currentStatement) !== '') { array_push($statements, trim($currentStatement)); } $currentStatement = ''; $numSingleQuotes = $numEscapedSingleQuotes = 0; } else { // The statement is not complete, the delimiter must be inside the statement $currentStatement .= $this->statementDelim; } } return $statements; }
/** * Transform "HandlerClass" to "handler-class" * and "myOp" to "my-op". * @param $string */ function uncamelize($string) { assert(!empty($string)); // Transform "myOp" to "MyOp" $string = ucfirst($string); // Insert hyphens between words and return the string in lowercase $words = array(); String::regexp_match_all('/[A-Z][a-z0-9]*/', $string, $words); assert(isset($words[0]) && !empty($words[0]) && strlen(implode('', $words[0])) == strlen($string)); return strtolower(implode('-', $words[0])); }