/** * Split a string into a clean array of keywords * @param $text string * @param $allowWildcards boolean * @return array of keywords */ static function filterKeywords($text, $allowWildcards = false) { $minLength = Config::getVar('search', 'min_word_length'); $stopwords = self::_loadStopwords(); // Join multiple lines into a single string if (is_array($text)) { $text = join("\n", $text); } $cleanText = Core::cleanVar($text); // Remove punctuation $cleanText = PKPString::regexp_replace('/[!"\\#\\$%\'\\(\\)\\.\\?@\\[\\]\\^`\\{\\}~]/', '', $cleanText); $cleanText = PKPString::regexp_replace('/[\\+,:;&\\/<=>\\|\\\\]/', ' ', $cleanText); $cleanText = PKPString::regexp_replace('/[\\*]/', $allowWildcards ? '%' : ' ', $cleanText); $cleanText = PKPString::strtolower($cleanText); // Split into words $words = PKPString::regexp_split('/\\s+/', $cleanText); // FIXME Do not perform further filtering for some fields, e.g., author names? // Remove stopwords $keywords = array(); foreach ($words as $k) { if (!isset($stopwords[$k]) && PKPString::strlen($k) >= $minLength && !is_numeric($k)) { $keywords[] = PKPString::substr($k, 0, SEARCH_KEYWORD_MAX_LENGTH); } } return $keywords; }
/** * Generate a filename for a library file. * @param $type int LIBRARY_FILE_TYPE_... * @param $originalFileName string * @return string */ function generateFileName($type, $originalFileName) { $libraryFileDao = DAORegistry::getDAO('LibraryFileDAO'); $suffix = $this->getFileSuffixFromType($type); $ext = $this->getExtension($originalFileName); $truncated = $this->truncateFileName($originalFileName, 127 - PKPString::strlen($suffix) - 1); $baseName = PKPString::substr($truncated, 0, PKPString::strpos($originalFileName, $ext) - 1); // Try a simple syntax first $fileName = $baseName . '-' . $suffix . '.' . $ext; if (!$libraryFileDao->filenameExists($this->contextId, $fileName)) { return $fileName; } for ($i = 1;; $i++) { $fullSuffix = $suffix . '-' . $i; //truncate more if necessary $truncated = $this->truncateFileName($originalFileName, 127 - PKPString::strlen($fullSuffix) - 1); // get the base name and append the suffix $baseName = PKPString::substr($truncated, 0, PKPString::strpos($originalFileName, $ext) - 1); //try the following $fileName = $baseName . '-' . $fullSuffix . '.' . $ext; if (!$libraryFileDao->filenameExists($this->contextId, $fileName)) { return $fileName; } } }
/** * @see FormValidator::isValid() * Value is valid if it is empty and optional or meets the specified length requirements. * @return boolean */ function isValid() { if ($this->isEmptyAndOptional()) { return true; } else { $length = PKPString::strlen($this->getFieldValue()); switch ($this->_comparator) { case '==': return $length == $this->_length; case '!=': return $length != $this->_length; case '<': return $length < $this->_length; case '>': return $length > $this->_length; case '<=': return $length <= $this->_length; case '>=': return $length >= $this->_length; } return false; } }
/** * Derive a confidence score calculated as the similarity of the * original raw citation and the citation text generated from the * citation description. * @param $metadataDescription MetadataDescription * @return integer filter confidence score */ function _filterConfidenceScore(&$metadataDescription) { // Retrieve the original plain text citation. $originalCitation = $this->getOriginalRawCitation(); // Generate the formatted citation output from the description. $citationOutputFilter =& $this->getCitationOutputFilter(); $generatedCitation = $citationOutputFilter->execute($metadataDescription); // Strip formatting and the Google Scholar tag so that we get a plain // text string that is comparable with the raw citation. $generatedCitation = trim(str_replace(GOOGLE_SCHOLAR_TAG, '', strip_tags($generatedCitation))); // Compare the original to the generated citation. $citationDiff = PKPString::diff($originalCitation, $generatedCitation); // Calculate similarity as the number of deleted characters in relation to the // number of characters in the original citation. This intentionally excludes // additions as these can represent useful data like a DOI or an external link. $deletedCharacters = 0; foreach ($citationDiff as $diffPart) { // Identify deletions. if (key($diffPart) == -1) { $deletedCharacters += PKPString::strlen(current($diffPart)); } } $originalCharacters = PKPString::strlen($originalCitation); $partOfCommonCharacters = ($originalCharacters - $deletedCharacters) / $originalCharacters; $filterConfidenceScore = (int) round(min($partOfCommonCharacters * 100, 100)); return $filterConfidenceScore; }
/** * Converts a string with a single person * to an NLM name description. * * TODO: add initials from all given names to initials * element * * @param $personString string * @param $title boolean true to parse for title * @param $degrees boolean true to parse for degrees * @return MetadataDescription an NLM name description or null * if the string could not be converted */ function &_parsePersonString($personString, $title, $degrees) { // Expressions to parse person strings, ported from CiteULike person // plugin, see http://svn.citeulike.org/svn/plugins/person.tcl static $personRegex = array('title' => '(?:His (?:Excellency|Honou?r)\\s+|Her (?:Excellency|Honou?r)\\s+|The Right Honou?rable\\s+|The Honou?rable\\s+|Right Honou?rable\\s+|The Rt\\.? Hon\\.?\\s+|The Hon\\.?\\s+|Rt\\.? Hon\\.?\\s+|Mr\\.?\\s+|Ms\\.?\\s+|M\\/s\\.?\\s+|Mrs\\.?\\s+|Miss\\.?\\s+|Dr\\.?\\s+|Sir\\s+|Dame\\s+|Prof\\.?\\s+|Professor\\s+|Doctor\\s+|Mister\\s+|Mme\\.?\\s+|Mast(?:\\.|er)?\\s+|Lord\\s+|Lady\\s+|Madam(?:e)?\\s+|Priv\\.-Doz\\.\\s+)+', 'degrees' => '(,\\s+(?:[A-Z\\.]+))+', 'initials' => '(?:(?:[A-Z]\\.){1,3}[A-Z]\\.?)|(?:(?:[A-Z]\\.\\s){1,3}[A-Z]\\.?)|(?:[A-Z]{1,4})|(?:(?:[A-Z]\\.-?){1,4})|(?:(?:[A-Z]\\.-?){1,3}[A-Z]\\.?)|(?:(?:[A-Z]-){1,3}[A-Z])|(?:(?:[A-Z]\\s){1,3}[A-Z]\\.?)|(?:(?:[A-Z]-){1,3}[A-Z]\\.?)', 'prefix' => 'Dell(?:[a|e])?(?:\\s|$)|Dalle(?:\\s|$)|D[a|e]ll\'(?:\\s|$)|Dela(?:\\s|$)|Del(?:\\s|$)|[Dd]e(?:\\s|$)(?:La(?:\\s|$)|Los(?:\\s|$))?|[Dd]e(?:\\s|$)|[Dd][a|i|u](?:\\s|$)|L[a|e|o](?:\\s|$)|[D|L|O]\'|St\\.?(?:\\s|$)|San(?:\\s|$)|[Dd]en(?:\\s|$)|[Vv]on(?:\\s|$)(?:[Dd]er(?:\\s|$))?|(?:[Ll][ea](?:\\s|$))?[Vv]an(?:\\s|$)(?:[Dd]e(?:n|r)?(?:\\s|$))?', 'givenName' => '(?:[^ \\t\\n\\r\\f\\v,.;()]{2,}|[^ \\t\\n\\r\\f\\v,.;()]{2,}\\-[^ \\t\\n\\r\\f\\v,.;()]{2,})'); // The expressions for given name, suffix and surname are the same $personRegex['surname'] = $personRegex['suffix'] = $personRegex['givenName']; $personRegex['double-surname'] = "(?:" . $personRegex['surname'] . "\\s)*" . $personRegex['surname']; // Shortcut for prefixed surname $personRegexPrefixedSurname = "(?P<prefix>(?:" . $personRegex['prefix'] . ")?)(?P<surname>" . $personRegex['surname'] . ")"; $personRegexPrefixedDoubleSurname = "(?P<prefix>(?:" . $personRegex['prefix'] . ")?)(?P<surname>" . $personRegex['double-surname'] . ")"; // Instantiate the target person description $personDescription = new MetadataDescription('lib.pkp.plugins.metadata.nlm30.schema.Nlm30NameSchema', $this->_assocType); // Clean the person string $personString = trim($personString); // 1. Extract title and degree from the person string and use this as suffix $suffixString = ''; $results = array(); if ($title && PKPString::regexp_match_get('/^(' . $personRegex['title'] . ')/i', $personString, $results)) { $suffixString = trim($results[1], ',:; '); $personString = PKPString::regexp_replace('/^(' . $personRegex['title'] . ')/i', '', $personString); } if ($degrees && PKPString::regexp_match_get('/(' . $personRegex['degrees'] . ')$/i', $personString, $results)) { $degreesArray = explode(',', trim($results[1], ',')); foreach ($degreesArray as $key => $degree) { $degreesArray[$key] = PKPString::trimPunctuation($degree); } $suffixString .= ' - ' . implode('; ', $degreesArray); $personString = PKPString::regexp_replace('/(' . $personRegex['degrees'] . ')$/i', '', $personString); } if (!empty($suffixString)) { $personDescription->addStatement('suffix', $suffixString); } // Space initials when followed by a given name or last name. $personString = PKPString::regexp_replace('/([A-Z])\\.([A-Z][a-z])/', '\\1. \\2', $personString); // 2. Extract names and initials from the person string // The parser expressions are ordered by specificity. The most specific expressions // come first. Only if these specific expressions don't work will we turn to less // specific ones. This avoids parsing errors. It also explains why we don't use the // ?-quantifier for optional elements like initials or middle name where they could // be misinterpreted. $personExpressions = array('/^' . $personRegexPrefixedSurname . '$/i', '/^(?P<initials>' . $personRegex['initials'] . ')\\s' . $personRegexPrefixedSurname . '$/', '/^' . $personRegexPrefixedSurname . ',?\\s(?P<initials>' . $personRegex['initials'] . ')$/', '/^' . $personRegexPrefixedDoubleSurname . ',\\s(?P<givenName>' . $personRegex['givenName'] . ')\\s(?P<initials>' . $personRegex['initials'] . ')$/', '/^(?P<givenName>' . $personRegex['givenName'] . ')\\s(?P<initials>' . $personRegex['initials'] . ')\\s' . $personRegexPrefixedSurname . '$/', '/^' . $personRegexPrefixedDoubleSurname . ',\\s(?P<givenName>(?:' . $personRegex['givenName'] . '\\s)+)(?P<initials>' . $personRegex['initials'] . ')$/', '/^(?P<givenName>(?:' . $personRegex['givenName'] . '\\s)+)(?P<initials>' . $personRegex['initials'] . ')\\s' . $personRegexPrefixedSurname . '$/', '/^' . $personRegexPrefixedDoubleSurname . ',(?P<givenName>(?:\\s' . $personRegex['givenName'] . ')+)$/', '/^(?P<givenName>(?:' . $personRegex['givenName'] . '\\s)+)' . $personRegexPrefixedSurname . '$/', '/^\\s*(?P<surname>' . $personRegex['surname'] . ')(?P<suffix>(?:\\s+' . $personRegex['suffix'] . ')?)\\s*,\\s*(?P<initials>(?:' . $personRegex['initials'] . ')?)\\s*\\((?P<givenName>(?:\\s*' . $personRegex['givenName'] . ')+)\\s*\\)\\s*(?P<prefix>(?:' . $personRegex['prefix'] . ')?)$/', '/^(?P<givenName>' . $personRegex['givenName'] . ')\\.(?P<surname>' . $personRegex['double-surname'] . ')$/', '/^(?P<surname>.*)$/'); $results = array(); foreach ($personExpressions as $expressionId => $personExpression) { if ($nameFound = PKPString::regexp_match_get($personExpression, $personString, $results)) { // Given names if (!empty($results['givenName'])) { // Split given names $givenNames = explode(' ', trim($results['givenName'])); foreach ($givenNames as $givenName) { $personDescription->addStatement('given-names', $givenName); unset($givenName); } } // Initials (will also be saved as given names) if (!empty($results['initials'])) { $results['initials'] = str_replace(array('.', '-', ' '), array('', '', ''), $results['initials']); for ($initialNum = 0; $initialNum < PKPString::strlen($results['initials']); $initialNum++) { $initial = $results['initials'][$initialNum]; $personDescription->addStatement('given-names', $initial); unset($initial); } } // Surname if (!empty($results['surname'])) { // Correct all-upper surname if (strtoupper($results['surname']) == $results['surname']) { $results['surname'] = ucwords(strtolower($results['surname'])); } $personDescription->addStatement('surname', $results['surname']); } // Prefix/Suffix foreach (array('prefix', 'suffix') as $propertyName) { if (!empty($results[$propertyName])) { $results[$propertyName] = trim($results[$propertyName]); $personDescription->addStatement($propertyName, $results[$propertyName]); } } break; } } return $personDescription; }
/** * Checks whether the given string is an ISBN. * @param $isbn * @return boolean */ function isValidIsbn($isbn) { return is_string($isbn) && is_numeric($isbn) && PKPString::strlen($isbn) == 13; }
/** * Construct an array of search strings from a citation * description and an array of search templates. * The templates may contain the placeholders * %aulast%: the first author's surname * %au%: the first author full name * %title%: the article-title (if it exists), * otherwise the source * %date%: the publication year * %isbn%: ISBN * @param $searchTemplates an array of templates * @param $citationDescription MetadataDescription * @return array */ function constructSearchStrings(&$searchTemplates, &$citationDescription) { // Convert first authors' name description to a string import('lib.pkp.plugins.metadata.nlm30.filter.Nlm30NameSchemaPersonStringFilter'); $personStringFilter = new Nlm30NameSchemaPersonStringFilter(); // Retrieve the authors $firstAuthorSurname = $firstAuthor = ''; $authors = $citationDescription->getStatement('person-group[@person-group-type="author"]'); if (is_array($authors) && count($authors)) { $firstAuthorSurname = (string) $authors[0]->getStatement('surname'); $firstAuthor = $personStringFilter->execute($authors[0]); } // Retrieve the editors $firstEditorSurname = $firstEditor = ''; $editors = $citationDescription->getStatement('person-group[@person-group-type="editor"]'); if (is_array($editors) && count($editors)) { $firstEditorSurname = (string) $editors[0]->getStatement('surname'); $firstEditor = $personStringFilter->execute($editors[0]); } // Retrieve (default language) title $title = (string) ($citationDescription->hasStatement('article-title') ? $citationDescription->getStatement('article-title') : $citationDescription->getStatement('source')); // Extract the year from the publication date $year = (string) $citationDescription->getStatement('date'); $year = PKPString::strlen($year) > 4 ? PKPString::substr($year, 0, 4) : $year; // Retrieve ISBN $isbn = (string) $citationDescription->getStatement('isbn'); // Replace the placeholders in the templates $searchStrings = array(); foreach ($searchTemplates as $searchTemplate) { // Try editors and authors separately $searchStrings[] = str_replace(array('%aulast%', '%au%', '%title%', '%date%', '%isbn%'), array($firstAuthorSurname, $firstAuthor, $title, $year, $isbn), $searchTemplate); $searchStrings[] = str_replace(array('%aulast%', '%au%', '%title%', '%date%', '%isbn%'), array($firstEditorSurname, $firstEditor, $title, $year, $isbn), $searchTemplate); } // Remove empty or duplicate searches $searchStrings = array_map(array('PKPString', 'trimPunctuation'), $searchStrings); $searchStrings = array_unique($searchStrings); $searchStrings = arrayClean($searchStrings); return $searchStrings; }
/** * Transform a single NLM name description to a person string. * NB: We use the style: surname suffix, initials (first-name) prefix * which is relatively easy to parse back. * @param $personDescription MetadataDescription|'et-al' * @return string */ function _flattenPersonDescription(&$personDescription) { // Handle et-al if (is_string($personDescription) && $personDescription == PERSON_STRING_FILTER_ETAL) { return 'et al'; } $nameVars['%surname%'] = (string) $personDescription->getStatement('surname'); $givenNames = $personDescription->getStatement('given-names'); $nameVars['%firstname%'] = $nameVars['%initials%'] = ''; if (is_array($givenNames) && count($givenNames)) { if (PKPString::strlen($givenNames[0]) > 1) { $nameVars['%firstname%'] = array_shift($givenNames); } foreach ($givenNames as $givenName) { $nameVars['%initials%'] .= PKPString::substr($givenName, 0, 1) . '.'; } } if (!empty($nameVars['%initials%'])) { $nameVars['%initials%'] = ' ' . $nameVars['%initials%']; } $nameVars['%prefix%'] = (string) $personDescription->getStatement('prefix'); if (!empty($nameVars['%prefix%'])) { $nameVars['%prefix%'] = ' ' . $nameVars['%prefix%']; } $nameVars['%suffix%'] = (string) $personDescription->getStatement('suffix'); if (!empty($nameVars['%suffix%'])) { $nameVars['%suffix%'] = ' ' . $nameVars['%suffix%']; } // Fill placeholders in person template. $personString = str_replace(array_keys($nameVars), array_values($nameVars), $this->getTemplate()); // Remove empty brackets and trailing/leading whitespace $personString = trim(str_replace('()', '', $personString)); return $personString; }
/** * @function abntDateFormatWithDay Format date taking in consideration ABNT month abbreviations * @param $string string * @return string */ function abntDateFormatWithDay($string) { if (is_numeric($string)) { // it is a numeric string, we handle it as timestamp $timestamp = (int) $string; } else { $timestamp = strtotime($string); } $format = "%d %B %Y"; if (PKPString::strlen(strftime("%B", $timestamp)) > 4) { $format = "%d %b. %Y"; } return PKPString::strtolower(strftime($format, $timestamp)); }
/** * Fills the given citation object with * meta-data retrieved from PubMed. * @param $pmid string * @return MetadataDescription */ function &_lookup($pmid) { $nullVar = null; // Use eFetch to get XML metadata for the given PMID $lookupParams = array('db' => 'pubmed', 'mode' => 'xml', 'tool' => 'pkp-wal', 'id' => $pmid); if (!is_null($this->getEmail())) { $lookupParams['email'] = $this->getEmail(); } // Call the eFetch URL and get an XML result if (is_null($resultDOM = $this->callWebService(PUBMED_WEBSERVICE_EFETCH, $lookupParams))) { return $nullVar; } $articleTitleNodes = $resultDOM->getElementsByTagName('ArticleTitle'); $articleTitleFirstNode = $articleTitleNodes->item(0); $medlineTaNodes = $resultDOM->getElementsByTagName('MedlineTA'); $medlineTaFirstNode = $medlineTaNodes->item(0); $metadata = array('pub-id[@pub-id-type="pmid"]' => $pmid, 'article-title' => $articleTitleFirstNode->textContent, 'source' => $medlineTaFirstNode->textContent); $volumeNodes = $resultDOM->getElementsByTagName('Volume'); $issueNodes = $resultDOM->getElementsByTagName('Issue'); if ($volumeNodes->length > 0) { $volumeFirstNode = $volumeNodes->item(0); } $metadata['volume'] = $volumeFirstNode->textContent; if ($issueNodes->length > 0) { $issueFirstNode = $issueNodes->item(0); } $metadata['issue'] = $issueFirstNode->textContent; // Get list of author full names foreach ($resultDOM->getElementsByTagName("Author") as $authorNode) { if (!isset($metadata['person-group[@person-group-type="author"]'])) { $metadata['person-group[@person-group-type="author"]'] = array(); } // Instantiate an NLM name description $authorDescription = new MetadataDescription('lib.pkp.plugins.metadata.nlm30.schema.Nlm30NameSchema', ASSOC_TYPE_AUTHOR); // Surname $lastNameNodes = $authorNode->getElementsByTagName('LastName'); $lastNameFirstNode = $lastNameNodes->item(0); $authorDescription->addStatement('surname', $lastNameFirstNode->textContent); // Given names $givenNamesString = ''; $firstNameNodes = $authorNode->getElementsByTagName('FirstName'); if ($firstNameNodes->length > 0) { $firstNameFirstNode = $firstNameNodes->item(0); $givenNamesString = $firstNameFirstNode->textContent; } else { $foreNameNodes = $authorNode->getElementsByTagName('ForeName'); if ($foreNameNodes->length > 0) { $foreNameFirstNode = $foreNameNodes->item(0); $givenNamesString = $foreNameFirstNode->textContent; } } if (!empty($givenNamesString)) { foreach (explode(' ', $givenNamesString) as $givenName) { $authorDescription->addStatement('given-names', PKPString::trimPunctuation($givenName)); } } // Suffix $suffixNodes = $authorNode->getElementsByTagName('Suffix'); if ($suffixNodes->length > 0) { $suffixFirstNode = $suffixNodes->item(0); $authorDescription->addStatement('suffix', $suffixFirstNode->textContent); } // Include collective names // FIXME: This corresponds to an NLM-citation <collab> tag and should be part of the Metadata implementation /*if ($resultDOM->getElementsByTagName("CollectiveName")->length > 0 && $authorNode->getElementsByTagName("CollectiveName")->item(0)->textContent != '') { }*/ $metadata['person-group[@person-group-type="author"]'][] =& $authorDescription; unset($authorDescription); } // Extract pagination $medlinePgnNodes = $resultDOM->getElementsByTagName('MedlinePgn'); $medlinePgnFirstNode = $medlinePgnNodes->item(0); if (PKPString::regexp_match_get("/^[:p\\.\\s]*(?P<fpage>[Ee]?\\d+)(-(?P<lpage>\\d+))?/", $medlinePgnFirstNode->textContent, $pages)) { $fPage = (int) $pages['fpage']; $metadata['fpage'] = $fPage; if (!empty($pages['lpage'])) { $lPage = (int) $pages['lpage']; // Deal with shortcuts like '382-7' if ($lPage < $fPage) { $lPage = (int) (PKPString::substr($pages['fpage'], 0, -PKPString::strlen($pages['lpage'])) . $pages['lpage']); } $metadata['lpage'] = $lPage; } } // Get publication date (can be in several places in PubMed). $dateNode = null; $articleDateNodes = $resultDOM->getElementsByTagName('ArticleDate'); if ($articleDateNodes->length > 0) { $dateNode = $articleDateNodes->item(0); } else { $pubDateNodes = $resultDOM->getElementsByTagName('PubDate'); if ($pubDateNodes->length > 0) { $dateNode = $pubDateNodes->item(0); } } // Retrieve the data parts and assemble date. if (!is_null($dateNode)) { $publicationDate = ''; $requiresNormalization = false; foreach (array('Year' => 4, 'Month' => 2, 'Day' => 2) as $dateElement => $padding) { $dateElementNodes = $dateNode->getElementsByTagName($dateElement); if ($dateElementNodes->length > 0) { if (!empty($publicationDate)) { $publicationDate .= '-'; } $dateElementFirstNode = $dateElementNodes->item(0); $datePart = str_pad($dateElementFirstNode->textContent, $padding, '0', STR_PAD_LEFT); if (!is_numeric($datePart)) { $requiresNormalization = true; } $publicationDate .= $datePart; } else { break; } } // Normalize the date to NLM standard if necessary. if ($requiresNormalization) { $dateFilter = new DateStringNormalizerFilter(); $publicationDate = $dateFilter->execute($publicationDate); } if (!empty($publicationDate)) { $metadata['date'] = $publicationDate; } } // Get publication type $publicationTypeNodes = $resultDOM->getElementsByTagName('PublicationType'); if ($publicationTypeNodes->length > 0) { foreach ($publicationTypeNodes as $publicationType) { // The vast majority of items on PubMed are articles so catch these... if (PKPString::strpos(PKPString::strtolower($publicationType->textContent), 'article') !== false) { $metadata['[@publication-type]'] = NLM30_PUBLICATION_TYPE_JOURNAL; break; } } } // Get DOI if it exists $articleIdNodes = $resultDOM->getElementsByTagName('ArticleId'); foreach ($articleIdNodes as $idNode) { if ($idNode->getAttribute('IdType') == 'doi') { $metadata['pub-id[@pub-id-type="doi"]'] = $idNode->textContent; } } // Use eLink utility to find fulltext links $lookupParams = array('dbfrom' => 'pubmed', 'cmd' => 'llinks', 'tool' => 'pkp-wal', 'id' => $pmid); if (!is_null($resultDOM = $this->callWebService(PUBMED_WEBSERVICE_ELINK, $lookupParams))) { // Get a list of possible links foreach ($resultDOM->getElementsByTagName("ObjUrl") as $linkOut) { $attributes = ''; foreach ($linkOut->getElementsByTagName("Attribute") as $attribute) { $attributes .= PKPString::strtolower($attribute->textContent) . ' / '; } // Only add links to open access resources if (PKPString::strpos($attributes, "subscription") === false && PKPString::strpos($attributes, "membership") === false && PKPString::strpos($attributes, "fee") === false && $attributes != "") { $urlNodes = $linkOut->getElementsByTagName('Url'); $urlFirstNode = $urlNodes->item(0); $links[] = $urlFirstNode->textContent; } } // Take the first link if we have any left (presumably pubmed returns them in preferential order) if (isset($links[0])) { $metadata['uri'] = $links[0]; } } return $this->getNlm30CitationDescriptionFromMetadataArray($metadata); }
/** * Retrieve auto-suggestions from the faceting service. * @param $url string * @param $searchRequest SolrSearchRequest * @param $userInput string * @param $fieldName string * @return array The generated suggestions. */ function _getFacetingAutosuggestions($url, $searchRequest, $userInput, $fieldName) { // Remove special characters from the user input. $searchTerms = strtr($userInput, '"()+-|&!', ' '); // Cut off the last search term. $searchTerms = explode(' ', $searchTerms); $facetPrefix = array_pop($searchTerms); if (empty($facetPrefix)) { return array(); } // Use the remaining search query to pre-filter // facet results. This may be an invalid query // but edismax will deal gracefully with syntax // errors. $userInput = PKPString::substr($userInput, 0, -PKPString::strlen($facetPrefix)); switch ($fieldName) { case 'query': // The 'query' filter goes against all fields. $articleSearch = new ArticleSearch(); $solrFields = array_values($articleSearch->getIndexFieldMap()); break; case 'indexTerms': // The 'index terms' filter goes against keyword index fields. $solrFields = array('discipline', 'subject', 'type', 'coverage'); break; default: // All other filters can be used directly. $solrFields = array($fieldName); } $solrFieldString = implode('|', $solrFields); $searchRequest->addQueryFieldPhrase($solrFieldString, $userInput); // Construct the main query. $params = $this->_getSearchQueryParameters($searchRequest); if (!isset($params['q'])) { // Use a catch-all query in case we have no limiting // search. $params['q'] = '*:*'; } if ($fieldName == 'query') { $params['facet.field'] = 'default_spell'; } else { $params['facet.field'] = $fieldName . '_spell'; } $facetPrefixLc = PKPString::strtolower($facetPrefix); $params['facet.prefix'] = $facetPrefixLc; // Make the request. $response = $this->_makeRequest($url, $params); if (!is_a($response, 'DOMXPath')) { return array(); } // Extract term suggestions. $nodeList = $response->query('//lst[@name="facet_fields"]/lst/int/@name'); if ($nodeList->length == 0) { return array(); } $termSuggestions = array(); foreach ($nodeList as $childNode) { $termSuggestions[] = $childNode->value; } // Add the term suggestion to the remaining user input. $suggestions = array(); foreach ($termSuggestions as $termSuggestion) { // Restore case if possible. if (strpos($termSuggestion, $facetPrefixLc) === 0) { $termSuggestion = $facetPrefix . PKPString::substr($termSuggestion, PKPString::strlen($facetPrefix)); } $suggestions[] = $userInput . $termSuggestion; } return $suggestions; }
/** * Parse an XML file using the specified handler. * If no handler has been specified, XMLParserDOMHandler is used by default, returning a tree structure representing the document. * @param $file string full path to the XML file * @param $dataCallback mixed Optional callback for data handling: function dataCallback($operation, $wrapper, $data = null) * @return object actual return type depends on the handler */ function &parse($file, $dataCallback = null) { $parser =& $this->createParser(); if (!isset($this->handler)) { // Use default handler for parsing $handler = new XMLParserDOMHandler(); $this->setHandler($handler); } xml_set_object($parser, $this->handler); xml_set_element_handler($parser, "startElement", "endElement"); xml_set_character_data_handler($parser, "characterData"); import('lib.pkp.classes.file.FileWrapper'); $wrapper =& FileWrapper::wrapper($file); // Handle responses of various types while (true) { $newWrapper = $wrapper->open(); if (is_object($newWrapper)) { // Follow a redirect unset($wrapper); $wrapper =& $newWrapper; unset($newWrapper); } elseif (!$newWrapper) { // Could not open resource -- error $returner = false; return $returner; } else { // OK, we've found the end result break; } } if (!$wrapper) { $result = false; return $result; } if ($dataCallback) { call_user_func($dataCallback, 'open', $wrapper); } while (!$wrapper->eof() && ($data = $wrapper->read()) !== false) { // if the string contains non-UTF8 characters, convert it to UTF-8 for parsing if (Config::getVar('i18n', 'charset_normalization') == 'On' && !PKPString::utf8_compliant($data)) { $utf8_last = PKPString::substr($data, PKPString::strlen($data) - 1); // if the string ends in a "bad" UTF-8 character, maybe it's truncated while (!$wrapper->eof() && PKPString::utf8_bad_find($utf8_last) === 0) { // read another chunk of data $data .= $wrapper->read(); $utf8_last = PKPString::substr($data, PKPString::strlen($data) - 1); } $data = PKPString::utf8_normalize($data); // strip any invalid UTF-8 sequences $data = PKPString::utf8_bad_strip($data); // convert named entities to numeric entities $data = strtr($data, PKPString::getHTMLEntities()); } // strip any invalid ASCII control characters $data = PKPString::utf8_strip_ascii_ctrl($data); if ($dataCallback) { call_user_func($dataCallback, 'parse', $wrapper, $data); } if (!xml_parse($parser, $data, $wrapper->eof())) { $this->addError(xml_error_string(xml_get_error_code($parser))); } } if ($dataCallback) { call_user_func($dataCallback, 'close', $wrapper); } $wrapper->close(); $result = $this->handler->getResult(); $this->destroyParser($parser); if (isset($handler)) { $handler->destroy(); } return $result; }
/** * Truncate a filename to fit in the specified length. */ function truncateFileName($fileName, $length = 127) { if (PKPString::strlen($fileName) <= $length) { return $fileName; } $ext = $this->getExtension($fileName); $truncated = PKPString::substr($fileName, 0, $length - 1 - PKPString::strlen($ext)) . '.' . $ext; return PKPString::substr($truncated, 0, $length); }