/** * Add a file to the search index. * @param $articleId int * @param $type int * @param $fileId int */ function updateFileIndex($articleId, $type, $fileId) { import('classes.file.ArticleFileManager'); $fileMgr = new ArticleFileManager($articleId); $file =& $fileMgr->getFile($fileId); if (isset($file)) { $parser =& SearchFileParser::fromFile($file); } if (isset($parser)) { if ($parser->open()) { $searchDao =& DAORegistry::getDAO('ArticleSearchDAO'); $objectId = $searchDao->insertObject($articleId, $type, $fileId); $position = 0; while (($text = $parser->read()) !== false) { ArticleSearchIndex::indexObjectKeywords($objectId, $text, $position); } $parser->close(); } } }
/** * Add a file to the search index. * @param $monographId int * @param $type int * @param $fileId int */ function updateFileIndex($monographId, $type, $fileId) { $submissionFileDao =& DAORegistry::getDAO('SubmissionFileDAO'); /* @var $submissionFileDao SubmissionFileDAO */ $file =& $submissionFileDao->getLatestRevision($fileId); if (isset($file)) { $parser =& SearchFileParser::fromFile($file); } if (isset($parser)) { if ($parser->open()) { $searchDao =& DAORegistry::getDAO('MonographSearchDAO'); $objectId = $searchDao->insertObject($monographId, $type, $fileId); $position = 0; while (($text = $parser->read()) !== false) { MonographSearchIndex::indexObjectKeywords($objectId, $text, $position); } $parser->close(); } } }
/** * @see OAIMetadataFormat#toXml * TODO: * <copyright-holder> * In Isabelle's mapping document: * Article order in the issue's Table of Contents */ function toXml(&$record, $format = null) { $article =& $record->getData('article'); $journal =& $record->getData('journal'); $section =& $record->getData('section'); $issue =& $record->getData('issue'); $galleys =& $record->getData('galleys'); $articleId = $article->getId(); // Cache issue ordering information. static $issueId; static $sectionSeq; if (!isset($issueId) || $issueId != $issue->getId()) { $sectionDao = DAORegistry::getDAO('SectionDAO'); $issueId = $issue->getId(); $sections = $sectionDao->getByIssueId($issueId); $sectionSeq = array(); $i = 0; foreach ($sections as $thisSection) { $sectionSeq[$thisSection->getId()] = $i++; } unset($sections); } $abbreviation = $journal->getLocalizedSetting('abbreviation'); $printIssn = $journal->getSetting('printIssn'); $onlineIssn = $journal->getSetting('onlineIssn'); $primaryLocale = $article->getLanguage() != '' ? $article->getLanguage() : $journal->getPrimaryLocale(); $publisherInstitution = $journal->getSetting('publisherInstitution'); $datePublished = $article->getDatePublished(); if (!$datePublished) { $datePublished = $issue->getDatePublished(); } if ($datePublished) { $datePublished = strtotime($datePublished); } $response = "<article\n" . "\txmlns=\"http://dtd.nlm.nih.gov/publishing/2.3\"\n" . "\txmlns:xlink=\"http://www.w3.org/1999/xlink\"\n" . "\txmlns:mml=\"http://www.w3.org/1998/Math/MathML\"\n" . "\txmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" . "\txsi:schemaLocation=\"http://dtd.nlm.nih.gov/publishing/2.3\n" . "\thttp://dtd.nlm.nih.gov/publishing/2.3/xsd/journalpublishing.xsd\"\n" . (($s = $section->getLocalizedIdentifyType()) != '' ? "\tarticle-type=\"" . htmlspecialchars(Core::cleanVar($s)) . "\"" : '') . "\txml:lang=\"" . strtoupper(substr($primaryLocale, 0, 2)) . "\">\n" . "\t<front>\n" . "\t\t<journal-meta>\n" . "\t\t\t<journal-id journal-id-type=\"other\">" . htmlspecialchars(Core::cleanVar(($s = Config::getVar('oai', 'nlm_journal_id')) != '' ? $s : $journal->getPath())) . "</journal-id>\n" . "\t\t\t<journal-title>" . htmlspecialchars(Core::cleanVar($journal->getLocalizedName())) . "</journal-title>\n"; // Include translated journal titles foreach ($journal->getName(null) as $locale => $title) { if ($locale == $primaryLocale) { continue; } $response .= "\t\t\t<trans-title xml:lang=\"" . strtoupper(substr($locale, 0, 2)) . "\">" . htmlspecialchars(Core::cleanVar($title)) . "</trans-title>\n"; } $response .= (!empty($onlineIssn) ? "\t\t\t<issn pub-type=\"epub\">" . htmlspecialchars(Core::cleanVar($onlineIssn)) . "</issn>" : '') . (!empty($printIssn) ? "\t\t\t<issn pub-type=\"ppub\">" . htmlspecialchars(Core::cleanVar($printIssn)) . "</issn>" : '') . ($publisherInstitution != '' ? "\t\t\t<publisher><publisher-name>" . htmlspecialchars(Core::cleanVar($publisherInstitution)) . "</publisher-name></publisher>\n" : '') . "\t\t</journal-meta>\n" . "\t\t<article-meta>\n" . "\t\t\t<article-id pub-id-type=\"other\">" . htmlspecialchars(Core::cleanVar($article->getBestArticleId())) . "</article-id>\n" . (($s = $article->getPubId('doi')) ? "\t\t\t<article-id pub-id-type=\"doi\">" . htmlspecialchars(Core::cleanVar($s)) . "</article-id>\n" : '') . "\t\t\t<article-categories><subj-group subj-group-type=\"heading\"><subject>" . htmlspecialchars(Core::cleanVar($section->getLocalizedTitle())) . "</subject></subj-group></article-categories>\n" . "\t\t\t<title-group>\n" . "\t\t\t\t<article-title>" . htmlspecialchars(Core::cleanVar(strip_tags($article->getLocalizedTitle()))) . "</article-title>\n"; // Include translated journal titles foreach ($article->getTitle(null) as $locale => $title) { if ($locale == $primaryLocale) { continue; } $response .= "\t\t\t\t<trans-title xml:lang=\"" . strtoupper(substr($locale, 0, 2)) . "\">" . htmlspecialchars(Core::cleanVar(strip_tags($title))) . "</trans-title>\n"; } $response .= "\t\t\t</title-group>\n" . "\t\t\t<contrib-group>\n"; // Include authors foreach ($article->getAuthors() as $author) { $response .= "\t\t\t\t<contrib " . ($author->getPrimaryContact() ? 'corresp="yes" ' : '') . "contrib-type=\"author\">\n" . "\t\t\t\t\t<name name-style=\"western\">\n" . "\t\t\t\t\t\t<surname>" . htmlspecialchars(Core::cleanVar($author->getLastName())) . "</surname>\n" . "\t\t\t\t\t\t<given-names>" . htmlspecialchars(Core::cleanVar($author->getFirstName()) . (($s = $author->getMiddleName()) != '' ? " {$s}" : '')) . "</given-names>\n" . "\t\t\t\t\t</name>\n" . (($s = $author->getLocalizedAffiliation()) != '' ? "\t\t\t\t\t<aff>" . htmlspecialchars(Core::cleanVar($s)) . "</aff>\n" : '') . "\t\t\t\t\t<email>" . htmlspecialchars(Core::cleanVar($author->getEmail())) . "</email>\n" . (($s = $author->getUrl()) != '' ? "\t\t\t\t\t<uri>" . htmlspecialchars(Core::cleanVar($s)) . "</uri>\n" : '') . "\t\t\t\t</contrib>\n"; } // Include editorships (optimized) $response .= $this->getEditorialInfo($journal->getId()); $response .= "\t\t\t</contrib-group>\n"; if ($datePublished) { $response .= "\t\t\t<pub-date pub-type=\"epub\">\n" . "\t\t\t\t<day>" . strftime('%d', $datePublished) . "</day>\n" . "\t\t\t\t<month>" . strftime('%m', $datePublished) . "</month>\n" . "\t\t\t\t<year>" . strftime('%Y', $datePublished) . "</year>\n" . "\t\t\t</pub-date>\n"; } $response .= ($issue->getShowYear() ? "\t\t\t<pub-date pub-type=\"collection\"><year>" . htmlspecialchars(Core::cleanVar($issue->getYear())) . "</year></pub-date>\n" : '') . ($issue->getShowVolume() ? "\t\t\t<volume>" . htmlspecialchars(Core::cleanVar($issue->getVolume())) . "</volume>\n" : '') . ($issue->getShowNumber() ? "\t\t\t<issue seq=\"" . htmlspecialchars(Core::cleanVar($sectionSeq[$section->getId()] * 100 + $article->getSequence())) . "\">" . htmlspecialchars(Core::cleanVar($issue->getNumber())) . "</issue>\n" : '') . "\t\t\t<issue-id pub-id-type=\"other\">" . htmlspecialchars(Core::cleanVar($issue->getBestIssueId())) . "</issue-id>\n" . ($issue->getShowTitle() ? "\t\t\t<issue-title>" . htmlspecialchars(Core::cleanVar($issue->getLocalizedTitle())) . "</issue-title>\n" : ''); // Include page info, if available and parseable. $matches = null; if (PKPString::regexp_match_get('/^[Pp][Pp]?[.]?[ ]?(\\d+)$/', $article->getPages(), $matches)) { $matchedPage = htmlspecialchars(Core::cleanVar($matches[1])); $response .= "\t\t\t\t<fpage>{$matchedPage}</fpage><lpage>{$matchedPage}</lpage>\n"; $pageCount = 1; } elseif (PKPString::regexp_match_get('/^[Pp][Pp]?[.]?[ ]?(\\d+)[ ]?(-|–)[ ]?([Pp][Pp]?[.]?[ ]?)?(\\d+)$/', $article->getPages(), $matches)) { $matchedPageFrom = htmlspecialchars(Core::cleanVar($matches[1])); $matchedPageTo = htmlspecialchars(Core::cleanVar($matches[4])); $response .= "\t\t\t\t<fpage>{$matchedPageFrom}</fpage>\n" . "\t\t\t\t<lpage>{$matchedPageTo}</lpage>\n"; $pageCount = $matchedPageTo - $matchedPageFrom + 1; } $response .= "\t\t\t<permissions>\n" . "\t\t\t\t<copyright-statement>" . htmlspecialchars(__('submission.copyrightStatement', array('copyrightYear' => $article->getCopyrightYear(), 'copyrightHolder' => $article->getLocalizedCopyrightHolder()))) . "</copyright-statement>\n" . ($datePublished ? "\t\t\t\t<copyright-year>" . $article->getCopyrightYear() . "</copyright-year>\n" : '') . "\t\t\t\t<license xlink:href=\"" . $article->getLicenseURL() . "\">\n" . (($s = Application::getCCLicenseBadge($article->getLicenseURL())) ? "\t\t\t\t\t<license-p>" . strip_tags($s) . "</license-p>\n" : '') . "\t\t\t\t</license>\n" . "\t\t\t</permissions>\n" . "\t\t\t<self-uri xlink:href=\"" . htmlspecialchars(Core::cleanVar(Request::url($journal->getPath(), 'article', 'view', $article->getBestArticleId()))) . "\" />\n"; // Include galley links foreach ($article->getGalleys() as $galley) { $response .= "\t\t\t<self-uri content-type=\"" . htmlspecialchars(Core::cleanVar($galley->getFileType())) . "\" xlink:href=\"" . htmlspecialchars(Core::cleanVar(Request::url($journal->getPath(), 'article', 'view', array($article->getBestArticleId(), $galley->getId())))) . "\" />\n"; } // Include abstract(s) $abstract = htmlspecialchars(Core::cleanVar(strip_tags($article->getLocalizedAbstract()))); if (!empty($abstract)) { $abstract = "<p>{$abstract}</p>"; // $abstract = '<p>' . PKPString::regexp_replace('/\n+/', '</p><p>', $abstract) . '</p>'; $response .= "\t\t\t<abstract xml:lang=\"" . strtoupper(substr($primaryLocale, 0, 2)) . "\">{$abstract}</abstract>\n"; } if (is_array($article->getAbstract(null))) { foreach ($article->getAbstract(null) as $locale => $abstract) { if ($locale == $primaryLocale || empty($abstract)) { continue; } $abstract = htmlspecialchars(Core::cleanVar(strip_tags($abstract))); if (empty($abstract)) { continue; } $abstract = "<p>{$abstract}</p>"; //$abstract = '<p>' . PKPString::regexp_replace('/\n+/', '</p><p>', $abstract) . '</p>'; $response .= "\t\t\t<abstract-trans xml:lang=\"" . strtoupper(substr($locale, 0, 2)) . "\">{$abstract}</abstract-trans>\n"; } } $subjects = array(); if (is_array($article->getSubject(null))) { foreach ($article->getSubject(null) as $locale => $subject) { $s = array_map('trim', explode(';', Core::cleanVar($subject))); if (!empty($s)) { $subjects[$locale] = $s; } } } if (!empty($subjects)) { foreach ($subjects as $locale => $s) { $response .= "\t\t\t<kwd-group xml:lang=\"" . strtoupper(substr($locale, 0, 2)) . "\">\n"; foreach ($s as $subject) { $response .= "\t\t\t\t<kwd>" . htmlspecialchars($subject) . "</kwd>\n"; } $response .= "\t\t\t</kwd-group>\n"; } } $response .= (isset($pageCount) ? "\t\t\t<counts><page-count count=\"" . (int) $pageCount . "\" /></counts>\n" : '') . "\t\t</article-meta>\n" . "\t</front>\n"; // Include body text (for search indexing only) import('classes.search.ArticleSearchIndex'); $text = ''; $galleys = $article->getGalleys(); // Give precedence to HTML galleys, as they're quickest to parse usort($galleys, create_function('$a, $b', 'return $a->getFileType()==\'text/html\')?-1:1;')); // Determine any access limitations. If there are, do not // provide the full-text. import('classes.issue.IssueAction'); $issueAction = new IssueAction(); $subscriptionRequired = $issueAction->subscriptionRequired($issue); $isSubscribedDomain = $issueAction->subscribedDomain($journal, $issue->getId(), $article->getId()); if (!$subscriptionRequired || $isSubscribedDomain) { foreach ($galleys as $galley) { $parser =& SearchFileParser::fromFile($galley); if ($parser && $parser->open()) { while (($s = $parser->read()) !== false) { $text .= $s; } $parser->close(); } if ($galley->getFileType() == 'text/html') { $text = strip_tags($text); } unset($galley); // Use the first parseable galley. if (!empty($text)) { break; } } } if (!empty($text)) { $response .= "\t<body><p>" . htmlspecialchars(Core::cleanVar(Core::cleanVar($text))) . "</p></body>\n"; } // Add NLM citation info $filterDao = DAORegistry::getDAO('FilterDAO'); /* @var $filterDao FilterDAO */ $nlmFilters = $filterDao->getObjectsByGroup('submission=>nlm23-article-xml'); assert(count($nlmFilters) == 1); $nlmFilter = array_pop($nlmFilters); $nlmXmlDom = new DOMDocument(); $nlmXmlDom->loadXML($nlmFilter->execute($article)); $documentElement =& $nlmXmlDom->documentElement; // Work-around for hasChildNodes being stupid about whitespace. $hasChildren = false; if (isset($documentElement->childNodes)) { foreach ($documentElement->childNodes as $c) { if ($c->nodeType == XML_ELEMENT_NODE) { $hasChildren = true; } } } // If there were any citations, include them. if ($hasChildren) { $innerXml = $nlmXmlDom->saveXML($documentElement); $response .= "<back>{$innerXml}</back>\n"; } $response .= "</article>"; return $response; }
function &generateArticleDom(&$doc, &$journal, &$issue, &$article, &$galley) { $unavailableString = Locale::translate('plugins.importexport.erudit.unavailable'); $root =& XMLCustomWriter::createElement($doc, 'article'); XMLCustomWriter::setAttribute($root, 'idprop', $journal->getJournalId() . '-' . $issue->getIssueId() . '-' . $article->getArticleId() . '-' . $galley->getGalleyId(), false); XMLCustomWriter::setAttribute($root, 'arttype', 'article'); $lang = $article->getLanguage(); XMLCustomWriter::setAttribute($root, 'lang', isset($lang) ? $lang : 'en'); XMLCustomWriter::setAttribute($root, 'processing', 'cart'); /* --- admin --- */ $adminNode =& XMLCustomWriter::createElement($doc, 'admin'); XMLCustomWriter::appendChild($root, $adminNode); /* --- articleinfo --- */ $articleInfoNode =& XMLCustomWriter::createElement($doc, 'articleinfo'); XMLCustomWriter::appendChild($adminNode, $articleInfoNode); // The first public ID should be a full URL to the article. $urlIdNode =& XMLCustomWriter::createChildWithText($doc, $articleInfoNode, 'idpublic', Request::url($journal->getPath(), 'article', 'view', array($article->getArticleId(), $galley->getGalleyId()))); XMLCustomWriter::setAttribute($urlIdNode, 'scheme', 'sici'); /* --- journal --- */ $journalNode =& XMLCustomWriter::createElement($doc, 'journal'); XMLCustomWriter::appendChild($adminNode, $journalNode); XMLCustomWriter::setAttribute($journalNode, 'id', 'ojs-' . $journal->getPath()); XMLCustomWriter::createChildWithText($doc, $journalNode, 'jtitle', $journal->getJournalTitle()); XMLCustomWriter::createChildWithText($doc, $journalNode, 'jshorttitle', $journal->getLocalizedSetting('initials'), false); if (!($printIssn = $journal->getSetting('printIssn'))) { $printIssn = $unavailableString; } XMLCustomWriter::createChildWithText($doc, $journalNode, 'idissn', $printIssn); if (!($onlineIssn = $journal->getSetting('onlineIssn'))) { $onlineIssn = $unavailableString; } XMLCustomWriter::createChildWithText($doc, $journalNode, 'iddigissn', $onlineIssn); /* --- issue --- */ $issueNode =& XMLCustomWriter::createElement($doc, 'issue'); XMLCustomWriter::appendChild($adminNode, $issueNode); XMLCustomWriter::setAttribute($issueNode, 'id', 'ojs-' . $issue->getBestIssueId()); XMLCustomWriter::createChildWithText($doc, $issueNode, 'volume', $issue->getVolume(), false); XMLCustomWriter::createChildWithText($doc, $issueNode, 'issueno', $issue->getNumber(), false); $pubNode =& XMLCustomWriter::createElement($doc, 'pub'); XMLCustomWriter::appendChild($issueNode, $pubNode); XMLCustomWriter::createChildWithText($doc, $pubNode, 'year', $issue->getYear()); $digPubNode =& XMLCustomWriter::createElement($doc, 'digpub'); XMLCustomWriter::appendChild($issueNode, $digPubNode); XMLCustomWriter::createChildWithText($doc, $digPubNode, 'date', EruditExportDom::formatDate($issue->getDatePublished())); /* --- Publisher & DTD --- */ $publisherInstitution =& $journal->getSetting('publisherInstitution'); $publisherNode =& XMLCustomWriter::createElement($doc, 'publisher'); XMLCustomWriter::setAttribute($publisherNode, 'id', 'ojs-' . $journal->getJournalId() . '-' . $issue->getIssueId() . '-' . $article->getArticleId()); XMLCustomWriter::appendChild($adminNode, $publisherNode); $publisherInstitution = $unavailableString; if (empty($publisherInstitution)) { $publisherInstitution = $unavailableString; } XMLCustomWriter::createChildWithText($doc, $publisherNode, 'orgname', $publisherInstitution); $digprodNode =& XMLCustomWriter::createElement($doc, 'digprod'); XMLCustomWriter::createChildWithText($doc, $digprodNode, 'orgname', $publisherInstitution); XMLCustomWriter::setAttribute($digprodNode, 'id', 'ojs-prod-' . $journal->getJournalId() . '-' . $issue->getIssueId() . '-' . $article->getArticleId()); XMLCustomWriter::appendChild($adminNode, $digprodNode); $digdistNode =& XMLCustomWriter::createElement($doc, 'digdist'); XMLCustomWriter::createChildWithText($doc, $digdistNode, 'orgname', $publisherInstitution); XMLCustomWriter::setAttribute($digdistNode, 'id', 'ojs-dist-' . $journal->getJournalId() . '-' . $issue->getIssueId() . '-' . $article->getArticleId()); XMLCustomWriter::appendChild($adminNode, $digdistNode); $dtdNode =& XMLCustomWriter::createElement($doc, 'dtd'); XMLCustomWriter::appendChild($adminNode, $dtdNode); XMLCustomWriter::setAttribute($dtdNode, 'name', 'Erudit Article'); XMLCustomWriter::setAttribute($dtdNode, 'version', '3.0.0'); /* --- copyright --- */ $copyright = $journal->getLocalizedSetting('copyrightNotice'); XMLCustomWriter::createChildWithText($doc, $adminNode, 'copyright', empty($copyright) ? $unavailableString : $copyright); /* --- frontmatter --- */ $frontMatterNode =& XMLCustomWriter::createElement($doc, 'frontmatter'); XMLCustomWriter::appendChild($root, $frontMatterNode); $titleGroupNode =& XMLCustomWriter::createElement($doc, 'titlegr'); XMLCustomWriter::appendChild($frontMatterNode, $titleGroupNode); XMLCustomWriter::createChildWithText($doc, $titleGroupNode, 'title', strip_tags($article->getArticleTitle())); /* --- authorgr --- */ $authorGroupNode =& XMLCustomWriter::createElement($doc, 'authorgr'); XMLCustomWriter::appendChild($frontMatterNode, $authorGroupNode); $authorNum = 1; foreach ($article->getAuthors() as $author) { $authorNode =& XMLCustomWriter::createElement($doc, 'author'); XMLCustomWriter::appendChild($authorGroupNode, $authorNode); XMLCustomWriter::setAttribute($authorNode, 'id', 'ojs-' . $journal->getJournalId() . '-' . $issue->getIssueId() . '-' . $article->getArticleId() . '-' . $galley->getGalleyId() . '-' . $authorNum); $persNameNode =& XMLCustomWriter::createElement($doc, 'persname'); XMLCustomWriter::appendChild($authorNode, $persNameNode); // Opatan Inc. foreach ((array) $author->getFirstName(null) as $locale => $firstName) { $firstName = strip_tags($firstName); $firstNameNode =& XMLCustomWriter::createElement($doc, 'firstname'); XMLCustomWriter::setAttribute($firstNameNode, 'lang', $locale); XMLCustomWriter::appendChild($persNameNode, $firstNameNode); XMLCustomWriter::createChildWithText($doc, $firstNameNode, 'blocktext', $firstName); unset($firstNameNode); } // Opatan Inc. foreach ((array) $author->getMiddleName(null) as $locale => $middleName) { $middleName = strip_tags($middleName); $middleNameNode =& XMLCustomWriter::createElement($doc, 'middlename'); XMLCustomWriter::setAttribute($middleNameNode, 'lang', $locale); XMLCustomWriter::appendChild($persNameNode, $middleNameNode); XMLCustomWriter::createChildWithText($doc, $middleNameNode, 'blocktext', $middleName); unset($middleNameNode); } // Opatan Inc. foreach ((array) $author->getLastName(null) as $locale => $lastName) { $lastName = strip_tags($lastName); $lastNameNode =& XMLCustomWriter::createElement($doc, 'familyname'); XMLCustomWriter::setAttribute($lastNameNode, 'lang', $locale); XMLCustomWriter::appendChild($persNameNode, $lastNameNode); XMLCustomWriter::createChildWithText($doc, $lastNameNode, 'blocktext', $lastName); unset($lastNameNode); } // Opatan Inc. foreach ((array) $author->getAffiliation(null) as $locale => $affiliation) { $affiliation = strip_tags($affiliation); $affiliationNode =& XMLCustomWriter::createElement($doc, 'affiliation'); XMLCustomWriter::setAttribute($affiliationNode, 'lang', $locale); XMLCustomWriter::appendChild($authorNode, $affiliationNode); XMLCustomWriter::createChildWithText($doc, $affiliationNode, 'blocktext', $affiliation, false); } $authorNum++; } /* --- abstract and keywords --- */ foreach ((array) $article->getAbstract(null) as $locale => $abstract) { $abstract = strip_tags($abstract); $abstractNode =& XMLCustomWriter::createElement($doc, 'abstract'); XMLCustomWriter::setAttribute($abstractNode, 'lang', $locale); XMLCustomWriter::appendChild($frontMatterNode, $abstractNode); XMLCustomWriter::createChildWithText($doc, $abstractNode, 'blocktext', $abstract); unset($abstractNode); } if ($keywords = $article->getArticleSubject()) { $keywordGroupNode =& XMLCustomWriter::createElement($doc, 'keywordgr'); XMLCustomWriter::setAttribute($keywordGroupNode, 'lang', ($language = $article->getLanguage()) ? $language : 'en'); foreach (explode(';', $keywords) as $keyword) { XMLCustomWriter::createChildWithText($doc, $keywordGroupNode, 'keyword', trim($keyword), false); } XMLCustomWriter::appendChild($frontMatterNode, $keywordGroupNode); } /* --- body --- */ $bodyNode =& XMLCustomWriter::createElement($doc, 'body'); XMLCustomWriter::appendChild($root, $bodyNode); import('file.ArticleFileManager'); $articleFileManager =& new ArticleFileManager($article->getArticleId()); $file =& $articleFileManager->getFile($galley->getFileId()); $parser =& SearchFileParser::fromFile($file); if (isset($parser)) { if ($parser->open()) { // File supports text indexing. $textNode =& XMLCustomWriter::createElement($doc, 'text'); XMLCustomWriter::appendChild($bodyNode, $textNode); while (($line = $parser->read()) !== false) { $line = trim($line); if ($line != '') { XMLCustomWriter::createChildWithText($doc, $textNode, 'blocktext', $line, false); } } $parser->close(); } } return $root; }
function SearchHelperParser($type, $filePath) { parent::SearchFileParser($filePath); $this->type = $type; }
/** * Signal to the indexing back-end that an article file changed. * * @see ArticleSearchIndex::articleMetadataChanged() above for more * comments. * * @param $articleId int * @param $type int * @param $fileId int */ function articleFileChanged($articleId, $type, $fileId) { // Check whether a search plug-in jumps in. $hookResult =& HookRegistry::call('ArticleSearchIndex::articleFileChanged', array($articleId, $type, $fileId)); // If no search plug-in is activated then fall back to the // default database search implementation. if ($hookResult === false || is_null($hookResult)) { import('classes.file.ArticleFileManager'); $fileManager = new ArticleFileManager($articleId); $file =& $fileManager->getFile($fileId); if (isset($file)) { $parser =& SearchFileParser::fromFile($file); } if (isset($parser)) { if ($parser->open()) { $searchDao =& DAORegistry::getDAO('ArticleSearchDAO'); $objectId = $searchDao->insertObject($articleId, $type, $fileId); $position = 0; while (($text = $parser->read()) !== false) { $this->_indexObjectKeywords($objectId, $text, $position); } $parser->close(); } } } }
/** * Create a text parser for a file. * @param $file [Article|Paper]File * @return SearchFileParser */ function &fromFile(&$file) { $returner =& SearchFileParser::fromFileType($file->getFileType(), $file->getFilePath()); return $returner; }
/** * Signal to the indexing back-end that an article file changed. * * @see ArticleSearchIndex::articleMetadataChanged() above for more * comments. * * @param $articleId int * @param $type int * @param $fileId int */ function submissionFileChanged($articleId, $type, $fileId) { // Check whether a search plug-in jumps in. $hookResult = HookRegistry::call('ArticleSearchIndex::submissionFileChanged', array($articleId, $type, $fileId)); // If no search plug-in is activated then fall back to the // default database search implementation. if ($hookResult === false || is_null($hookResult)) { $submissionFileDao = DAORegistry::getDAO('SubmissionFileDAO'); /* @var $submissionFileDao SubmissionFileDAO */ $file = $submissionFileDao->getLatestRevision($fileId); if (isset($file)) { $parser = SearchFileParser::fromFile($file); } if (isset($parser) && $parser->open()) { $searchDao = DAORegistry::getDAO('ArticleSearchDAO'); $objectId = $searchDao->insertObject($articleId, $type, $fileId); $position = 0; while (($text = $parser->read()) !== false) { self::_indexObjectKeywords($objectId, $text, $position); } $parser->close(); } } }
function __construct($type, $filePath) { parent::__construct($filePath); $this->type = $type; }
/** * @see OAIMetadataFormat#toXml */ function toXml(&$record, $format = null) { $conference =& $record->getData('conference'); $schedConf =& $record->getData('schedConf'); $paper =& $record->getData('paper'); $track =& $record->getData('track'); $galleys =& $record->getData('galleys'); $paperId = $paper->getId(); $primaryLocale = $conference->getPrimaryLocale(); // If possible, use the paper presentation date for the paper date fields. // Otherwise, use the date published (i.e. the date it was marked "completed" // in the workflow). if ($datePublished = $paper->getStartTime()) { $datePublished = strtotime($datePublished); } else { $datePublished = strtotime($paper->getDatePublished()); } $response = "<article\n" . "\txmlns=\"http://dtd.nlm.nih.gov/publishing/2.3\"\n" . "\txmlns:xlink=\"http://www.w3.org/1999/xlink\"\n" . "\txmlns:mml=\"http://www.w3.org/1998/Math/MathML\"\n" . "\txmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" . "\txsi:schemaLocation=\"http://dtd.nlm.nih.gov/publishing/2.3\n" . "\thttp://dtd.nlm.nih.gov/publishing/2.3/xsd/journalpublishing.xsd\"\n" . (($s = $track->getLocalizedIdentifyType()) != '' ? "\tarticle-type=\"" . htmlspecialchars(Core::cleanVar($s)) . "\"" : '') . "\txml:lang=\"" . strtoupper(substr($primaryLocale, 0, 2)) . "\">\n" . "\t<front>\n" . "\t\t<journal-meta>\n" . "\t\t\t<journal-id journal-id-type=\"other\">" . htmlspecialchars(Core::cleanVar(($s = Config::getVar('oai', 'nlm_journal_id')) != '' ? $s : $conference->getPath() . '-' . $schedConf->getPath())) . "</journal-id>\n" . "\t\t\t<journal-title>" . htmlspecialchars(Core::cleanVar($schedConf->getLocalizedName())) . "</journal-title>\n"; // Include translated scheduled conference titles foreach ($schedConf->getTitle(null) as $locale => $title) { if ($locale == $primaryLocale) { continue; } $response .= "\t\t\t<trans-title xml:lang=\"" . strtoupper(substr($locale, 0, 2)) . "\">" . htmlspecialchars(Core::cleanVar($title)) . "</trans-title>\n"; } $response .= "\t\t</journal-meta>\n" . "\t\t<article-meta>\n" . "\t\t\t<article-id pub-id-type=\"other\">" . htmlspecialchars(Core::cleanVar($paper->getId())) . "</article-id>\n" . "\t\t\t<article-categories><subj-group subj-group-type=\"heading\"><subject>" . htmlspecialchars(Core::cleanVar($track->getLocalizedTitle())) . "</subject></subj-group></article-categories>\n" . "\t\t\t<title-group>\n" . "\t\t\t\t<article-title>" . htmlspecialchars(Core::cleanVar(strip_tags($paper->getLocalizedTitle()))) . "</article-title>\n"; // Include translated journal titles foreach ($paper->getTitle(null) as $locale => $title) { if ($locale == $primaryLocale) { continue; } $response .= "\t\t\t\t<trans-title xml:lang=\"" . strtoupper(substr($locale, 0, 2)) . "\">" . htmlspecialchars(Core::cleanVar(strip_tags($title))) . "</trans-title>\n"; } $response .= "\t\t\t</title-group>\n" . "\t\t\t<contrib-group>\n"; // Include authors foreach ($paper->getAuthors() as $author) { $response .= "\t\t\t\t<contrib " . ($author->getPrimaryContact() ? 'corresp="yes" ' : '') . "contrib-type=\"author\">\n" . "\t\t\t\t\t<name name-style=\"western\">\n" . "\t\t\t\t\t\t<surname>" . htmlspecialchars(Core::cleanVar($author->getLastName())) . "</surname>\n" . "\t\t\t\t\t\t<given-names>" . htmlspecialchars(Core::cleanVar($author->getFirstName()) . (($s = $author->getMiddleName()) != '' ? " {$s}" : '')) . "</given-names>\n" . "\t\t\t\t\t</name>\n" . (($s = $author->getLocalizedAffiliation()) != '' ? "\t\t\t\t\t<aff>" . htmlspecialchars(Core::cleanVar($s)) . "</aff>\n" : '') . "\t\t\t\t\t<email>" . htmlspecialchars(Core::cleanVar($author->getEmail())) . "</email>\n" . (($s = $author->getUrl()) != '' ? "\t\t\t\t\t<uri>" . htmlspecialchars(Core::cleanVar($s)) . "</uri>\n" : '') . "\t\t\t\t</contrib>\n"; } // Include editorships (optimized) $response .= $this->getEditorialInfo($conference->getId()); $response .= "\t\t\t</contrib-group>\n" . "\t\t\t<pub-date pub-type=\"epub\">\n" . "\t\t\t\t<day>" . strftime('%d', $datePublished) . "</day>\n" . "\t\t\t\t<month>" . strftime('%m', $datePublished) . "</month>\n" . "\t\t\t\t<year>" . strftime('%Y', $datePublished) . "</year>\n" . "\t\t\t</pub-date>\n"; $response .= "\t\t\t<permissions>\n" . (($s = $conference->getLocalizedSetting('copyrightNotice')) != '' ? "\t\t\t\t<copyright-statement>" . htmlspecialchars(Core::cleanVar($s)) . "</copyright-statement>\n" : '') . "\t\t\t\t<copyright-year>" . strftime('%Y', $datePublished) . "</copyright-year>\n" . "\t\t\t</permissions>\n" . "\t\t\t<self-uri xlink:href=\"" . htmlspecialchars(Core::cleanVar(Request::url($conference->getPath(), $schedConf->getPath(), 'paper', 'view', $paper->getId()))) . "\" />\n"; // Include galley links foreach ($paper->getGalleys() as $galley) { $response .= "\t\t\t<self-uri content-type=\"" . htmlspecialchars(Core::cleanVar($galley->getFileType())) . "\" xlink:href=\"" . htmlspecialchars(Core::cleanVar(Request::url($conference->getPath(), $schedConf->getPath(), 'paper', 'view', array($paper->getId(), $galley->getId())))) . "\" />\n"; } // Include abstract(s) $abstract = htmlspecialchars(Core::cleanVar(strip_tags($paper->getLocalizedAbstract()))); if (!empty($abstract)) { $abstract = "<p>{$abstract}</p>"; $response .= "\t\t\t<abstract xml:lang=\"" . strtoupper(substr($primaryLocale, 0, 2)) . "\">{$abstract}</abstract>\n"; } if (is_array($paper->getAbstract(null))) { foreach ($paper->getAbstract(null) as $locale => $abstract) { if ($locale == $primaryLocale || empty($abstract)) { continue; } $abstract = htmlspecialchars(Core::cleanVar(strip_tags($abstract))); if (empty($abstract)) { continue; } $abstract = "<p>{$abstract}</p>"; $response .= "\t\t\t<abstract-trans xml:lang=\"" . strtoupper(substr($locale, 0, 2)) . "\">{$abstract}</abstract-trans>\n"; } } $subjects = array(); if (is_array($paper->getSubject(null))) { foreach ($paper->getSubject(null) as $locale => $subject) { $s = array_map('trim', explode(';', Core::cleanVar($subject))); if (!empty($s)) { $subjects[$locale] = $s; } } } if (!empty($subjects)) { foreach ($subjects as $locale => $s) { $response .= "\t\t\t<kwd-group xml:lang=\"" . strtoupper(substr($locale, 0, 2)) . "\">\n"; foreach ($s as $subject) { $response .= "\t\t\t\t<kwd>" . htmlspecialchars($subject) . "</kwd>\n"; } $response .= "\t\t\t</kwd-group>\n"; } } $locationCity = $schedConf->getSetting('locationCity'); $locationCountry = $schedConf->getSetting('locationCountry'); if (empty($locationCity) && empty($locationCountry)) { $confLoc = ''; } elseif (empty($locationCity) && !empty($locationCountry)) { $confLoc = $locationCountry; } elseif (empty($locationCountry)) { $confLoc = $locationCity; } else { $confLoc = "{$locationCity}, {$locationCountry}"; } $response .= "\t\t\t<conference>\n" . "\t\t\t\t<conf-date>" . strftime('%Y-%m-%d', $schedConf->getSetting('startDate')) . "</conf-date>\n" . "\t\t\t\t<conf-name>" . htmlspecialchars(Core::cleanVar($schedConf->getLocalizedName())) . "</conf-name>\n" . "\t\t\t\t<conf-acronym>" . htmlspecialchars(Core::cleanVar($schedConf->getLocalizedAcronym())) . "</conf-acronym>\n" . (!empty($confLoc) ? "\t\t\t\t<conf-loc>" . htmlspecialchars(Core::cleanVar($confLoc)) . "</conf-loc>\n" : '') . "\t\t\t</conference>\n" . "\t\t</article-meta>\n" . "\t</front>\n"; // Include body text (for search indexing only) import('classes.search.PaperSearchIndex'); $text = ''; // $galleys = $paper->getGalleys(); // Give precedence to HTML galleys, as they're quickest to parse usort($galleys, create_function('$a, $b', 'return $a->isHtmlGalley()?-1:1;')); // Determine any access limitations. If there are, do not // provide the full-text. import('classes.schedConf.SchedConfAction'); $mayViewProceedings = SchedConfAction::mayViewProceedings($schedConf); if ($mayViewProceedings) { foreach ($galleys as $galley) { $parser =& SearchFileParser::fromFile($galley); if ($parser && $parser->open()) { while (($s = $parser->read()) !== false) { $text .= $s; } $parser->close(); } if ($galley->isHtmlGalley()) { $text = strip_tags($text); } unset($galley); // Use the first parseable galley. if (!empty($text)) { break; } } } if (!empty($text)) { $response .= "\t<body><p>" . htmlspecialchars(Core::cleanVar(Core::cleanVar($text))) . "</p></body>\n"; } $response .= "</article>"; return $response; }