public function listAction() { $this->indexAction(); $year = trim($this->_getParam('year')); if (preg_match('/^\\d{4}$/', $year) > 0) { $f = new Opus_DocumentFinder(); $f->setServerState('published'); $f->setServerDatePublishedRange($year, $year + 1); $this->view->ids = $f->ids(); } return $this->render('index'); }
public function listAction() { $this->indexAction(); $year = trim($this->_getParam('year')); if (preg_match('/^\\d{4}$/', $year) < 1) { return $this->render('index'); } $stats = array(); $emailToDocIds = array(); $f = new Opus_DocumentFinder(); $f->setServerState('published'); // $f->setServerDatePublishedRange($year, $year+1); foreach ($f->ids() as $docId) { $document = new Opus_Document($docId); $docStats = array('id' => $docId, 'authorEmails' => $this->_getAuthorEmails($document), 'errors' => 0, 'missingProject' => '', 'missingMsc' => '', 'missingReferee' => ''); if ($this->_countCollectionRole($document, 'projects') === 0) { $docStats['errors']++; $docStats['missingProject'] = true; } if ($this->_countSubjectMsc($document) === 0 and $this->_countCollectionRole($document, 'msc') === 0) { $docStats['errors']++; $docStats['missingMsc'] = true; } // if (count($document->getPersonReferee()) === 0) { // $docStats['errors']++; // $docStats['missingReferee'] = true; // } if ($docStats['errors'] > 0) { foreach ($docStats['authorEmails'] as $author) { $email = trim($author['email']); $name = trim($author['name']); $key = $name; if (!empty($email)) { $key .= " <{$email}>"; } if (!array_key_exists($key, $emailToDocIds)) { $emailToDocIds[$key] = array('email' => $email, 'name' => $name, 'ids' => array()); } $emailToDocIds[$key]['ids'][] = $docId; $emailToDocIds[$key]['ids'] = array_unique($emailToDocIds[$key]['ids']); sort($emailToDocIds[$key]['ids']); } } $stats[] = $docStats; } $this->view->stats = $stats; $this->view->email2docid = $emailToDocIds; return $this->render('index'); }
/** * Check for each database document in serverState publish if it exists in * Solr index. Furthermore, compare field value of serverDateModified in * database and Solr index. * */ private function checkDatabase() { $finder = new Opus_DocumentFinder(); $finder->setServerState('published'); $ids = $finder->ids(); $this->logger->info('checking ' . $finder->count() . ' published documents for consistency.'); foreach ($ids as $id) { try { $doc = new Opus_Document($id); } catch (Opus_Model_NotFoundException $e) { // ignore: document was deleted from database in meantime continue; } $serverDataModified = $doc->getServerDateModified()->getUnixTimestamp(); // retrieve document from index and compare serverDateModified fields $query = new Opus_SolrSearch_Query(Opus_SolrSearch_Query::DOC_ID); $query->setField('id', $id); $resultList = $this->searcher->search($query, $this->validateDocIds); $results = $resultList->getResults(); if ($resultList->getNumberOfHits() == 0) { $this->logger->info("inconsistency found for document {$id}: document is in database, but is not in Solr index."); $this->numOfInconsistencies++; if ($this->forceReindexing($doc)) { $this->numOfUpdates++; } } else { if ($resultList->getNumberOfHits() == 1) { if ($results[0]->getServerDateModified() != $serverDataModified) { $this->numOfInconsistencies++; $this->logger->info("inconsistency found for document {$id}: mismatch between values of server_date_modified in database and Solr index."); if ($this->forceReindexing($doc)) { $this->numOfUpdates++; } } } else { $this->logger->err('unexpected state: document with id ' . $id . ' exists multiple times in index.'); } } } }
public function __construct($results = array(), $numberOfHits = 0, $queryTime = 0, $facets = array(), $validateDocIds = true, $log = null) { $this->log = $log; $this->numberOfHits = $numberOfHits; $this->queryTime = $queryTime; $this->facets = $facets; $this->results = array(); // make sure that documents returned from index exist in database if (!empty($results)) { if ($validateDocIds) { $docIds = array(); foreach ($results as $result) { array_push($docIds, $result->getId()); } $finder = new Opus_DocumentFinder(); $finder->setServerState('published'); $finder->setIdSubset($docIds); $docIdsDB = $finder->ids(); $notInDB = 0; foreach ($results as $result) { if (in_array($result->getId(), $docIdsDB)) { array_push($this->results, $result); } else { $notInDB++; } } $resultsSize = count($this->results); if ($notInDB > 0 && !is_null($this->log)) { $inDB = $resultsSize - $notInDB; $this->log->err("found inconsistency between database and solr index: index returns {$resultsSize} documents, but only " . $inDB . " found in database"); } } else { $this->results = $results; } } }
/** * Implements response for OAI-PMH verb 'ListSets'. * * @param array &$oaiRequest Contains full request information * @return void */ private function __handleListSets() { $repIdentifier = $this->_configuration->getRepositoryIdentifier(); $this->_proc->setParameter('', 'repIdentifier', $repIdentifier); $this->_xml->appendChild($this->_xml->createElement('Documents')); $sets = array('bibliography:true' => 'Set for bibliographic entries', 'bibliography:false' => 'Set for non-bibliographic entries'); $logger = $this->getLogger(); $setSpecPattern = '[A-Za-z0-9\\-_\\.!~\\*\'\\(\\)]+'; $finder = new Opus_DocumentFinder(); $finder->setServerState('published'); foreach ($finder->groupedTypesPlusCount() as $doctype => $row) { if (0 == preg_match("/^{$setSpecPattern}\$/", $doctype)) { $msg = "Invalid SetSpec (doctype='" . $doctype . "')." . " Allowed characters are [{$setSpecPattern}]."; $logger->err("OAI-PMH: {$msg}"); continue; } $setSpec = 'doc-type:' . $doctype; // $count = $row['count']; $sets[$setSpec] = "Set for document type '{$doctype}'"; } $oaiRolesSets = Opus_CollectionRole::fetchAllOaiEnabledRoles(); foreach ($oaiRolesSets as $result) { if ($result['oai_name'] == 'doc-type') { continue; } if (0 == preg_match("/^{$setSpecPattern}\$/", $result['oai_name'])) { $msg = "Invalid SetSpec (oai_name='" . $result['oai_name'] . "'). " . " Please check collection role " . $result['id'] . ". " . " Allowed characters are {$setSpecPattern}."; $logger->err("OAI-PMH: {$msg}"); continue; } $setSpec = $result['oai_name']; // $count = $result['count']; $sets[$setSpec] = "Set for collection '" . $result['oai_name'] . "'"; $role = new Opus_CollectionRole($result['id']); foreach ($role->getOaiSetNames() as $subset) { $subSetSpec = "{$setSpec}:" . $subset['oai_subset']; // $subSetCount = $subset['count']; if (0 == preg_match("/^{$setSpecPattern}\$/", $subset['oai_subset'])) { $msg = "Invalid SetSpec (oai_name='" . $subset['oai_subset'] . "')." . " Please check collection " . $subset['id'] . ". " . " Allowed characters are [{$setSpecPattern}]."; $logger->err("OAI-PMH: {$msg}"); continue; } $sets[$subSetSpec] = "Subset '" . $subset['oai_subset'] . "'" . " for collection '" . $result['oai_name'] . "'" . ': "' . trim($subset['name']) . '"'; } } foreach ($sets as $type => $name) { $opusDoc = $this->_xml->createElement('Opus_Sets'); $typeAttr = $this->_xml->createAttribute('Type'); $typeValue = $this->_xml->createTextNode($type); $typeAttr->appendChild($typeValue); $opusDoc->appendChild($typeAttr); $nameAttr = $this->_xml->createAttribute('TypeName'); $nameValue = $this->_xml->createTextNode($name); $nameAttr->appendChild($nameValue); $opusDoc->appendChild($nameAttr); $this->_xml->documentElement->appendChild($opusDoc); } }
/** * Regression test for OPUSVIER-849 */ public function testStartPageContainsTotalNumOfDocs() { // get total number of documents from all doc search $this->dispatch('/solrsearch/index/search/searchtype/all'); $document = new DOMDocument(); $document->loadHTML($this->getResponse()->getBody()); $element = $document->getElementById('search-result-numofhits'); $numOfHits = $element->firstChild->textContent; $docsInIndex = $this->getDocsInSearchIndex(); $numOfIndexDocs = $docsInIndex->getNumberOfHits(); $this->assertEquals($numOfIndexDocs, $numOfHits); $this->getResponse()->clearBody(); $this->dispatch('/home'); $document = new DOMDocument(); $document->loadHTML($this->getResponse()->getBody()); $element = $document->getElementById('solrsearch-totalnumofdocs'); $numOfDocs = $element->firstChild->textContent; $docFinder = new Opus_DocumentFinder(); $docFinder->setServerState('published'); $numOfDbDocs = $docFinder->count(); $this->assertEquals($numOfDbDocs, $numOfDocs); // kurze Erklärung des Vorhabens: die Dokumentanzahl bei der Catch-All-Suche // wird auf Basis einer Indexsuche ermittelt; die Anzahl der Dokument, die // auf der Startseite erscheint, wird dagegen über den DocumentFinder // ermittelt: im Idealfall sollten diese beiden Zahlen nicht voneinander // abweichen // wenn sie abweichen, dann aufgrund einer Inkonsistenz zwischen Datenbank // und Suchindex (das sollte im Rahmen der Tests eigentlich nicht auftreten) if ($numOfDbDocs != $numOfIndexDocs) { // ermittle die Doc-IDs, die im Index, aber nicht in der DB existieren // bzw. die in der DB, aber nicht im Index existieren $idsIndex = array(); $results = $docsInIndex->getResults(); foreach ($results as $result) { array_push($idsIndex, $result->getId()); } $idsDb = $docFinder->ids(); $idsIndexOnly = array_diff($idsIndex, $idsDb); $this->assertEquals(0, count($idsIndexOnly), 'Document IDs in search index, but not in database: ' . var_export($idsIndexOnly, true)); $idsDbOnly = array_diff($idsDb, $idsIndex); $this->assertEquals(0, count($idsDbOnly), 'Document IDs in database, but not in search index: ' . var_export($idsDbOnly, true)); $this->assertEquals($numOfDbDocs, $numOfIndexDocs, "number of docs in database ({$numOfDbDocs}) and search index ({$numOfIndexDocs}) differ from each other"); } $this->assertEquals($numOfDocs, $numOfHits); }
/** * Returns sum of all documents published before the $thresholdYear. */ public function getNumDocsUntil($thresholdYear) { $finder = new Opus_DocumentFinder(); $finder->setServerState('published'); $finder->setServerDatePublishedBefore($thresholdYear + 1); return $finder->count(); }
* LICENCE * OPUS is free software; you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the Free Software * Foundation; either version 2 of the Licence, or any later version. * OPUS is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. You should have received a copy of the GNU General Public License * along with OPUS; if not, write to the Free Software Foundation, Inc., 51 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * @category Application * @author Thoralf Klein <*****@*****.**> * @copyright Copyright (c) 2013, OPUS 4 development team * @license http://www.gnu.org/licenses/gpl.html General Public License * @version $Id$ */ /** * Basic publication statistics (based on server_published_date) */ $df = new Opus_DocumentFinder(); $years = $df->setServerState('published')->groupedServerYearPublished(); sort($years); $cumSum = 0; foreach ($years as $year) { $df = new Opus_DocumentFinder(); $count = $df->setServerState('published')->setServerDatePublishedRange($year, $year + 1)->count(); $cumSum += $count; echo "year {$year}: {$cumSum} ({$count})\n"; } exit;
/** * Prepare document finder. * * @return Opus_DocumentFinder */ protected function _prepareDocumentFinder() { $finder = new Opus_DocumentFinder(); $finder->setServerState(self::$_reviewServerState); $logger = $this->getLogger(); $userId = $this->_loggedUser->getUserId(); $onlyReviewerByUserId = false; // Add constraint for reviewer, if current user is *not* admin. if (Opus_Security_Realm::getInstance()->checkModule('admin')) { $message = "Review: Showing all unpublished documents to admin"; $logger->debug($message . " (user_id: {$userId})"); } elseif (Opus_Security_Realm::getInstance()->checkModule('review')) { if ($onlyReviewerByUserId) { $message = "Review: Showing only documents belonging to reviewer"; $finder->setEnrichmentKeyValue('reviewer.user_id', $userId); } else { $message = "Review: Showing all unpublished documents to reviewer"; } $logger->debug($message . " (user_id: {$userId})"); } else { $message = 'Review: Access to unpublished documents denied.'; $logger->err($message . " (user_id: {$userId})"); throw new Application_Exception($message); } return $finder; }
* OPUS is free software; you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the Free Software * Foundation; either version 2 of the Licence, or any later version. * OPUS is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. You should have received a copy of the GNU General Public License * along with OPUS; if not, write to the Free Software Foundation, Inc., 51 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * @category Application * @author Thoralf Klein <*****@*****.**> * @copyright Copyright (c) 2011, OPUS 4 development team * @license http://www.gnu.org/licenses/gpl.html General Public License * @version $Id$ */ // Bootstrapping require_once dirname(__FILE__) . '/../common/bootstrap.php'; $date = new DateTime(); $dateString = $date->sub(new DateInterval('P2D'))->format('Y-m-d'); $f = new Opus_DocumentFinder(); $f->setServerState('temporary')->setServerDateModifiedBefore($dateString); foreach ($f->ids() as $id) { $d = new Opus_Document($id); if ($d->getServerState() == 'temporary') { echo "deleting document: {$id}\n"; $d->deletePermanent(); } else { echo "NOT deleting document: {$id} because it has server state " . $d->getServerState(); } }
public function testShowAllHits() { $docFinder = new Opus_DocumentFinder(); $docFinder->setServerState('unpublished'); $unpublishedDocs = $docFinder->count(); $this->dispatch('/admin/documents/index/state/unpublished/hitsperpage/all'); $this->assertQueryCount('span.title', $unpublishedDocs); }
/** * Returns IDs for published documents in range. * * @param $start Start of ID range * @param $end End of ID range * @return array Array of document IDs */ private function getDocumentIds($start, $end) { $finder = new Opus_DocumentFinder(); $finder->setServerState('published'); if (isset($start)) { $finder->setIdRangeStart($start); } if (isset($end)) { $finder->setIdRangeEnd($end); } return $finder->ids(); }
/** * Fetch a list of all available document types. * * @deprecated */ public static function fetchDocumentTypes() { $finder = new Opus_DocumentFinder(); $finder->setServerState('published'); return $finder->groupedTypes(); }
private function getPublishedDocumentCount() { $finder = new Opus_DocumentFinder(); $finder->setServerState('published'); return count($finder->ids()); }
public function testReadLogfileWithNonEmptyFile() { $this->enableAsyncMode(); $finder = new Opus_DocumentFinder(); $finder->setServerState('published'); $numOfPublishedDocs = $finder->count(); $model = new Admin_Model_IndexMaintenance(); $model->createJob(); $this->runJobImmediately(); $logdata = $model->readLogFile(); $this->assertNotNull($logdata); $this->assertNotNull($logdata->getContent()); $this->assertNotNull($logdata->getModifiedDate()); $this->assertContains("checking {$numOfPublishedDocs} published documents for consistency.", $logdata->getContent(), "content of logfile:\n" . $logdata->getContent()); $this->assertContains('No inconsistency was detected.', $logdata->getContent()); $this->assertContains('Completed operation after ', $logdata->getContent()); }
* @author Sascha Szott <*****@*****.**> * @copyright Copyright (c) 2008-2012, OPUS 4 development team * @license http://www.gnu.org/licenses/gpl.html General Public License * @version $Id: find_missing_published_docs_in_searchindex.php 12011 2013-07-05 19:39:58Z sszott $ */ /** * * Dieses Skript gibt alle IDs der Dokumente zurück, die im Server State * published sind, aber aufgrund eines Fehlers nicht im Index repräsentiert sind. * * Siehe dazu auch die Story OPUSVIER-2368. * */ $numOfErrors = 0; $finder = new Opus_DocumentFinder(); $finder->setServerState('published'); foreach ($finder->ids() as $docId) { // check if document with id $docId is already persisted in search index $query = new Opus_SolrSearch_Query(Opus_SolrSearch_Query::DOC_ID); $query->setField('id', $docId); $query->setReturnIdsOnly(true); $query->setRows(Opus_SolrSearch_Query::MAX_ROWS); $searcher = new Opus_SolrSearch_Searcher(); if ($searcher->search($query)->getNumberOfHits() != 1) { echo "document # {$docId} is not stored in search index\n"; $numOfErrors++; } } if ($numOfErrors > 0) { echo "{$numOfErrors} missing documents were found\n"; } else {
public function indexAction() { $this->_helper->mainMenu('home'); $finder = new Opus_DocumentFinder(); $finder->setServerState('published'); $this->view->totalNumOfDocs = $finder->count(); }
} $documentType = @$options['doctype'] ? $options['doctype'] : false; $thesisPublisherId = @$options['publisherid'] ?: null; $thesisGrantorId = @$options['grantorid'] ?: null; $dryrun = isset($options['dryrun']); try { $dnbInstitute = new Opus_DnbInstitute($thesisPublisherId); } catch (Opus_Model_NotFoundException $omnfe) { _log("Opus_DnbInstitute with ID <{$thesisPublisherId}> does not exist.\nExiting..."); exit; } if ($dryrun) { _log("TEST RUN: NO DATA WILL BE MODIFIED"); } $docFinder = new Opus_DocumentFinder(); $docIds = $docFinder->setServerState('published'); if ($documentType != false) { $docFinder->setType($documentType); } $docIds = $docFinder->ids(); _log(count($docIds) . " documents " . ($documentType != false ? "of type '{$documentType}' " : '') . "found"); foreach ($docIds as $docId) { try { $doc = new Opus_Document($docId); if (count($doc->getFile()) == 0) { _log("Document <{$docId}> has no files, skipping.."); continue; } if (!is_null($thesisPublisherId)) { $thesisPublisher = $doc->getThesisPublisher(); if (empty($thesisPublisher)) {
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. You should have received a copy of the GNU General Public License * along with OPUS; if not, write to the Free Software Foundation, Inc., 51 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * @category Application * @author Sascha Szott <*****@*****.**> * @copyright Copyright (c) 2008-2011, OPUS 4 development team * @license http://www.gnu.org/licenses/gpl.html General Public License * @version $Id$ */ /** * Releases all documents in server state unpublished. */ $docFinder = new Opus_DocumentFinder(); $docFinder->setServerState('unpublished'); foreach ($docFinder->ids() as $id) { $d = null; try { $d = new Opus_Document($id); } catch (Opus_Model_NotFoundException $e) { // document with id $id does not exist continue; } if (!is_null($d)) { $date = new Opus_Date(); $date->setNow(); $d->setServerState('published'); $d->setServerDatePublished($date); $d->store(); echo "publishing of document with id {$id} was successful\n";