public function listAction()
 {
     $this->indexAction();
     $year = trim($this->_getParam('year'));
     if (preg_match('/^\\d{4}$/', $year) > 0) {
         $f = new Opus_DocumentFinder();
         $f->setServerState('published');
         $f->setServerDatePublishedRange($year, $year + 1);
         $this->view->ids = $f->ids();
     }
     return $this->render('index');
 }
Esempio n. 2
0
 public function listAction()
 {
     $this->indexAction();
     $year = trim($this->_getParam('year'));
     if (preg_match('/^\\d{4}$/', $year) < 1) {
         return $this->render('index');
     }
     $stats = array();
     $emailToDocIds = array();
     $f = new Opus_DocumentFinder();
     $f->setServerState('published');
     //        $f->setServerDatePublishedRange($year, $year+1);
     foreach ($f->ids() as $docId) {
         $document = new Opus_Document($docId);
         $docStats = array('id' => $docId, 'authorEmails' => $this->_getAuthorEmails($document), 'errors' => 0, 'missingProject' => '', 'missingMsc' => '', 'missingReferee' => '');
         if ($this->_countCollectionRole($document, 'projects') === 0) {
             $docStats['errors']++;
             $docStats['missingProject'] = true;
         }
         if ($this->_countSubjectMsc($document) === 0 and $this->_countCollectionRole($document, 'msc') === 0) {
             $docStats['errors']++;
             $docStats['missingMsc'] = true;
         }
         //            if (count($document->getPersonReferee()) === 0) {
         //                $docStats['errors']++;
         //                $docStats['missingReferee'] = true;
         //            }
         if ($docStats['errors'] > 0) {
             foreach ($docStats['authorEmails'] as $author) {
                 $email = trim($author['email']);
                 $name = trim($author['name']);
                 $key = $name;
                 if (!empty($email)) {
                     $key .= " <{$email}>";
                 }
                 if (!array_key_exists($key, $emailToDocIds)) {
                     $emailToDocIds[$key] = array('email' => $email, 'name' => $name, 'ids' => array());
                 }
                 $emailToDocIds[$key]['ids'][] = $docId;
                 $emailToDocIds[$key]['ids'] = array_unique($emailToDocIds[$key]['ids']);
                 sort($emailToDocIds[$key]['ids']);
             }
         }
         $stats[] = $docStats;
     }
     $this->view->stats = $stats;
     $this->view->email2docid = $emailToDocIds;
     return $this->render('index');
 }
Esempio n. 3
0
 /**
  * Check for each database document in serverState publish if it exists in
  * Solr index. Furthermore, compare field value of serverDateModified in
  * database and Solr index.
  * 
  */
 private function checkDatabase()
 {
     $finder = new Opus_DocumentFinder();
     $finder->setServerState('published');
     $ids = $finder->ids();
     $this->logger->info('checking ' . $finder->count() . ' published documents for consistency.');
     foreach ($ids as $id) {
         try {
             $doc = new Opus_Document($id);
         } catch (Opus_Model_NotFoundException $e) {
             // ignore: document was deleted from database in meantime
             continue;
         }
         $serverDataModified = $doc->getServerDateModified()->getUnixTimestamp();
         // retrieve document from index and compare serverDateModified fields
         $query = new Opus_SolrSearch_Query(Opus_SolrSearch_Query::DOC_ID);
         $query->setField('id', $id);
         $resultList = $this->searcher->search($query, $this->validateDocIds);
         $results = $resultList->getResults();
         if ($resultList->getNumberOfHits() == 0) {
             $this->logger->info("inconsistency found for document {$id}: document is in database, but is not in Solr index.");
             $this->numOfInconsistencies++;
             if ($this->forceReindexing($doc)) {
                 $this->numOfUpdates++;
             }
         } else {
             if ($resultList->getNumberOfHits() == 1) {
                 if ($results[0]->getServerDateModified() != $serverDataModified) {
                     $this->numOfInconsistencies++;
                     $this->logger->info("inconsistency found for document {$id}: mismatch between values of server_date_modified in database and Solr index.");
                     if ($this->forceReindexing($doc)) {
                         $this->numOfUpdates++;
                     }
                 }
             } else {
                 $this->logger->err('unexpected state: document with id ' . $id . ' exists multiple times in index.');
             }
         }
     }
 }
Esempio n. 4
0
 public function __construct($results = array(), $numberOfHits = 0, $queryTime = 0, $facets = array(), $validateDocIds = true, $log = null)
 {
     $this->log = $log;
     $this->numberOfHits = $numberOfHits;
     $this->queryTime = $queryTime;
     $this->facets = $facets;
     $this->results = array();
     // make sure that documents returned from index exist in database
     if (!empty($results)) {
         if ($validateDocIds) {
             $docIds = array();
             foreach ($results as $result) {
                 array_push($docIds, $result->getId());
             }
             $finder = new Opus_DocumentFinder();
             $finder->setServerState('published');
             $finder->setIdSubset($docIds);
             $docIdsDB = $finder->ids();
             $notInDB = 0;
             foreach ($results as $result) {
                 if (in_array($result->getId(), $docIdsDB)) {
                     array_push($this->results, $result);
                 } else {
                     $notInDB++;
                 }
             }
             $resultsSize = count($this->results);
             if ($notInDB > 0 && !is_null($this->log)) {
                 $inDB = $resultsSize - $notInDB;
                 $this->log->err("found inconsistency between database and solr index: index returns {$resultsSize} documents, but only " . $inDB . " found in database");
             }
         } else {
             $this->results = $results;
         }
     }
 }
 /**
  * Implements response for OAI-PMH verb 'ListSets'.
  *
  * @param  array &$oaiRequest Contains full request information
  * @return void
  */
 private function __handleListSets()
 {
     $repIdentifier = $this->_configuration->getRepositoryIdentifier();
     $this->_proc->setParameter('', 'repIdentifier', $repIdentifier);
     $this->_xml->appendChild($this->_xml->createElement('Documents'));
     $sets = array('bibliography:true' => 'Set for bibliographic entries', 'bibliography:false' => 'Set for non-bibliographic entries');
     $logger = $this->getLogger();
     $setSpecPattern = '[A-Za-z0-9\\-_\\.!~\\*\'\\(\\)]+';
     $finder = new Opus_DocumentFinder();
     $finder->setServerState('published');
     foreach ($finder->groupedTypesPlusCount() as $doctype => $row) {
         if (0 == preg_match("/^{$setSpecPattern}\$/", $doctype)) {
             $msg = "Invalid SetSpec (doctype='" . $doctype . "')." . " Allowed characters are [{$setSpecPattern}].";
             $logger->err("OAI-PMH: {$msg}");
             continue;
         }
         $setSpec = 'doc-type:' . $doctype;
         // $count = $row['count'];
         $sets[$setSpec] = "Set for document type '{$doctype}'";
     }
     $oaiRolesSets = Opus_CollectionRole::fetchAllOaiEnabledRoles();
     foreach ($oaiRolesSets as $result) {
         if ($result['oai_name'] == 'doc-type') {
             continue;
         }
         if (0 == preg_match("/^{$setSpecPattern}\$/", $result['oai_name'])) {
             $msg = "Invalid SetSpec (oai_name='" . $result['oai_name'] . "'). " . " Please check collection role " . $result['id'] . ". " . " Allowed characters are {$setSpecPattern}.";
             $logger->err("OAI-PMH: {$msg}");
             continue;
         }
         $setSpec = $result['oai_name'];
         // $count = $result['count'];
         $sets[$setSpec] = "Set for collection '" . $result['oai_name'] . "'";
         $role = new Opus_CollectionRole($result['id']);
         foreach ($role->getOaiSetNames() as $subset) {
             $subSetSpec = "{$setSpec}:" . $subset['oai_subset'];
             // $subSetCount = $subset['count'];
             if (0 == preg_match("/^{$setSpecPattern}\$/", $subset['oai_subset'])) {
                 $msg = "Invalid SetSpec (oai_name='" . $subset['oai_subset'] . "')." . " Please check collection " . $subset['id'] . ". " . " Allowed characters are [{$setSpecPattern}].";
                 $logger->err("OAI-PMH: {$msg}");
                 continue;
             }
             $sets[$subSetSpec] = "Subset '" . $subset['oai_subset'] . "'" . " for collection '" . $result['oai_name'] . "'" . ': "' . trim($subset['name']) . '"';
         }
     }
     foreach ($sets as $type => $name) {
         $opusDoc = $this->_xml->createElement('Opus_Sets');
         $typeAttr = $this->_xml->createAttribute('Type');
         $typeValue = $this->_xml->createTextNode($type);
         $typeAttr->appendChild($typeValue);
         $opusDoc->appendChild($typeAttr);
         $nameAttr = $this->_xml->createAttribute('TypeName');
         $nameValue = $this->_xml->createTextNode($name);
         $nameAttr->appendChild($nameValue);
         $opusDoc->appendChild($nameAttr);
         $this->_xml->documentElement->appendChild($opusDoc);
     }
 }
 /**
  * Regression test for OPUSVIER-849
  */
 public function testStartPageContainsTotalNumOfDocs()
 {
     // get total number of documents from all doc search
     $this->dispatch('/solrsearch/index/search/searchtype/all');
     $document = new DOMDocument();
     $document->loadHTML($this->getResponse()->getBody());
     $element = $document->getElementById('search-result-numofhits');
     $numOfHits = $element->firstChild->textContent;
     $docsInIndex = $this->getDocsInSearchIndex();
     $numOfIndexDocs = $docsInIndex->getNumberOfHits();
     $this->assertEquals($numOfIndexDocs, $numOfHits);
     $this->getResponse()->clearBody();
     $this->dispatch('/home');
     $document = new DOMDocument();
     $document->loadHTML($this->getResponse()->getBody());
     $element = $document->getElementById('solrsearch-totalnumofdocs');
     $numOfDocs = $element->firstChild->textContent;
     $docFinder = new Opus_DocumentFinder();
     $docFinder->setServerState('published');
     $numOfDbDocs = $docFinder->count();
     $this->assertEquals($numOfDbDocs, $numOfDocs);
     // kurze Erklärung des Vorhabens: die Dokumentanzahl bei der Catch-All-Suche
     // wird auf Basis einer Indexsuche ermittelt; die Anzahl der Dokument, die
     // auf der Startseite erscheint, wird dagegen über den DocumentFinder
     // ermittelt: im Idealfall sollten diese beiden Zahlen nicht voneinander
     // abweichen
     // wenn sie abweichen, dann aufgrund einer Inkonsistenz zwischen Datenbank
     // und Suchindex (das sollte im Rahmen der Tests eigentlich nicht auftreten)
     if ($numOfDbDocs != $numOfIndexDocs) {
         // ermittle die Doc-IDs, die im Index, aber nicht in der DB existieren
         // bzw. die in der DB, aber nicht im Index existieren
         $idsIndex = array();
         $results = $docsInIndex->getResults();
         foreach ($results as $result) {
             array_push($idsIndex, $result->getId());
         }
         $idsDb = $docFinder->ids();
         $idsIndexOnly = array_diff($idsIndex, $idsDb);
         $this->assertEquals(0, count($idsIndexOnly), 'Document IDs in search index, but not in database: ' . var_export($idsIndexOnly, true));
         $idsDbOnly = array_diff($idsDb, $idsIndex);
         $this->assertEquals(0, count($idsDbOnly), 'Document IDs in database, but not in search index: ' . var_export($idsDbOnly, true));
         $this->assertEquals($numOfDbDocs, $numOfIndexDocs, "number of docs in database ({$numOfDbDocs}) and search index ({$numOfIndexDocs}) differ from each other");
     }
     $this->assertEquals($numOfDocs, $numOfHits);
 }
Esempio n. 7
0
 /**
  * Returns sum of all documents published before the $thresholdYear.
  */
 public function getNumDocsUntil($thresholdYear)
 {
     $finder = new Opus_DocumentFinder();
     $finder->setServerState('published');
     $finder->setServerDatePublishedBefore($thresholdYear + 1);
     return $finder->count();
 }
 * LICENCE
 * OPUS is free software; you can redistribute it and/or modify it under the
 * terms of the GNU General Public License as published by the Free Software
 * Foundation; either version 2 of the Licence, or any later version.
 * OPUS is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 * details. You should have received a copy of the GNU General Public License
 * along with OPUS; if not, write to the Free Software Foundation, Inc., 51
 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 *
 * @category    Application
 * @author      Thoralf Klein <*****@*****.**>
 * @copyright   Copyright (c) 2013, OPUS 4 development team
 * @license     http://www.gnu.org/licenses/gpl.html General Public License
 * @version     $Id$
 */
/**
 * Basic publication statistics (based on server_published_date)
 */
$df = new Opus_DocumentFinder();
$years = $df->setServerState('published')->groupedServerYearPublished();
sort($years);
$cumSum = 0;
foreach ($years as $year) {
    $df = new Opus_DocumentFinder();
    $count = $df->setServerState('published')->setServerDatePublishedRange($year, $year + 1)->count();
    $cumSum += $count;
    echo "year {$year}: {$cumSum} ({$count})\n";
}
exit;
 /**
  * Prepare document finder.
  *
  * @return Opus_DocumentFinder
  */
 protected function _prepareDocumentFinder()
 {
     $finder = new Opus_DocumentFinder();
     $finder->setServerState(self::$_reviewServerState);
     $logger = $this->getLogger();
     $userId = $this->_loggedUser->getUserId();
     $onlyReviewerByUserId = false;
     // Add constraint for reviewer, if current user is *not* admin.
     if (Opus_Security_Realm::getInstance()->checkModule('admin')) {
         $message = "Review: Showing all unpublished documents to admin";
         $logger->debug($message . " (user_id: {$userId})");
     } elseif (Opus_Security_Realm::getInstance()->checkModule('review')) {
         if ($onlyReviewerByUserId) {
             $message = "Review: Showing only documents belonging to reviewer";
             $finder->setEnrichmentKeyValue('reviewer.user_id', $userId);
         } else {
             $message = "Review: Showing all unpublished documents to reviewer";
         }
         $logger->debug($message . " (user_id: {$userId})");
     } else {
         $message = 'Review: Access to unpublished documents denied.';
         $logger->err($message . " (user_id: {$userId})");
         throw new Application_Exception($message);
     }
     return $finder;
 }
 * OPUS is free software; you can redistribute it and/or modify it under the
 * terms of the GNU General Public License as published by the Free Software
 * Foundation; either version 2 of the Licence, or any later version.
 * OPUS is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 * details. You should have received a copy of the GNU General Public License 
 * along with OPUS; if not, write to the Free Software Foundation, Inc., 51 
 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 *
 * @category    Application
 * @author      Thoralf Klein <*****@*****.**>
 * @copyright   Copyright (c) 2011, OPUS 4 development team
 * @license     http://www.gnu.org/licenses/gpl.html General Public License
 * @version     $Id$
 */
// Bootstrapping
require_once dirname(__FILE__) . '/../common/bootstrap.php';
$date = new DateTime();
$dateString = $date->sub(new DateInterval('P2D'))->format('Y-m-d');
$f = new Opus_DocumentFinder();
$f->setServerState('temporary')->setServerDateModifiedBefore($dateString);
foreach ($f->ids() as $id) {
    $d = new Opus_Document($id);
    if ($d->getServerState() == 'temporary') {
        echo "deleting document: {$id}\n";
        $d->deletePermanent();
    } else {
        echo "NOT deleting document: {$id} because it has server state " . $d->getServerState();
    }
}
 public function testShowAllHits()
 {
     $docFinder = new Opus_DocumentFinder();
     $docFinder->setServerState('unpublished');
     $unpublishedDocs = $docFinder->count();
     $this->dispatch('/admin/documents/index/state/unpublished/hitsperpage/all');
     $this->assertQueryCount('span.title', $unpublishedDocs);
 }
 /**
  * Returns IDs for published documents in range.
  *
  * @param $start Start of ID range
  * @param $end End of ID range
  * @return array Array of document IDs
  */
 private function getDocumentIds($start, $end)
 {
     $finder = new Opus_DocumentFinder();
     $finder->setServerState('published');
     if (isset($start)) {
         $finder->setIdRangeStart($start);
     }
     if (isset($end)) {
         $finder->setIdRangeEnd($end);
     }
     return $finder->ids();
 }
Esempio n. 13
0
 /**
  * Fetch a list of all available document types.
  *
  * @deprecated
  */
 public static function fetchDocumentTypes()
 {
     $finder = new Opus_DocumentFinder();
     $finder->setServerState('published');
     return $finder->groupedTypes();
 }
 private function getPublishedDocumentCount()
 {
     $finder = new Opus_DocumentFinder();
     $finder->setServerState('published');
     return count($finder->ids());
 }
 public function testReadLogfileWithNonEmptyFile()
 {
     $this->enableAsyncMode();
     $finder = new Opus_DocumentFinder();
     $finder->setServerState('published');
     $numOfPublishedDocs = $finder->count();
     $model = new Admin_Model_IndexMaintenance();
     $model->createJob();
     $this->runJobImmediately();
     $logdata = $model->readLogFile();
     $this->assertNotNull($logdata);
     $this->assertNotNull($logdata->getContent());
     $this->assertNotNull($logdata->getModifiedDate());
     $this->assertContains("checking {$numOfPublishedDocs} published documents for consistency.", $logdata->getContent(), "content of logfile:\n" . $logdata->getContent());
     $this->assertContains('No inconsistency was detected.', $logdata->getContent());
     $this->assertContains('Completed operation after ', $logdata->getContent());
 }
 * @author      Sascha Szott <*****@*****.**>
 * @copyright   Copyright (c) 2008-2012, OPUS 4 development team
 * @license     http://www.gnu.org/licenses/gpl.html General Public License
 * @version     $Id: find_missing_published_docs_in_searchindex.php 12011 2013-07-05 19:39:58Z sszott $
 */
/**
 *
 * Dieses Skript gibt alle IDs der Dokumente zurück, die im Server State
 * published sind, aber aufgrund eines Fehlers nicht im Index repräsentiert sind.
 *
 * Siehe dazu auch die Story OPUSVIER-2368.
 *
 */
$numOfErrors = 0;
$finder = new Opus_DocumentFinder();
$finder->setServerState('published');
foreach ($finder->ids() as $docId) {
    // check if document with id $docId is already persisted in search index
    $query = new Opus_SolrSearch_Query(Opus_SolrSearch_Query::DOC_ID);
    $query->setField('id', $docId);
    $query->setReturnIdsOnly(true);
    $query->setRows(Opus_SolrSearch_Query::MAX_ROWS);
    $searcher = new Opus_SolrSearch_Searcher();
    if ($searcher->search($query)->getNumberOfHits() != 1) {
        echo "document # {$docId} is not stored in search index\n";
        $numOfErrors++;
    }
}
if ($numOfErrors > 0) {
    echo "{$numOfErrors} missing documents were found\n";
} else {
Esempio n. 17
0
 public function indexAction()
 {
     $this->_helper->mainMenu('home');
     $finder = new Opus_DocumentFinder();
     $finder->setServerState('published');
     $this->view->totalNumOfDocs = $finder->count();
 }
Esempio n. 18
0
}
$documentType = @$options['doctype'] ? $options['doctype'] : false;
$thesisPublisherId = @$options['publisherid'] ?: null;
$thesisGrantorId = @$options['grantorid'] ?: null;
$dryrun = isset($options['dryrun']);
try {
    $dnbInstitute = new Opus_DnbInstitute($thesisPublisherId);
} catch (Opus_Model_NotFoundException $omnfe) {
    _log("Opus_DnbInstitute with ID <{$thesisPublisherId}> does not exist.\nExiting...");
    exit;
}
if ($dryrun) {
    _log("TEST RUN: NO DATA WILL BE MODIFIED");
}
$docFinder = new Opus_DocumentFinder();
$docIds = $docFinder->setServerState('published');
if ($documentType != false) {
    $docFinder->setType($documentType);
}
$docIds = $docFinder->ids();
_log(count($docIds) . " documents " . ($documentType != false ? "of type '{$documentType}' " : '') . "found");
foreach ($docIds as $docId) {
    try {
        $doc = new Opus_Document($docId);
        if (count($doc->getFile()) == 0) {
            _log("Document <{$docId}> has no files, skipping..");
            continue;
        }
        if (!is_null($thesisPublisherId)) {
            $thesisPublisher = $doc->getThesisPublisher();
            if (empty($thesisPublisher)) {
 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 * details. You should have received a copy of the GNU General Public License
 * along with OPUS; if not, write to the Free Software Foundation, Inc., 51
 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 *
 * @category    Application
 * @author      Sascha Szott <*****@*****.**>
 * @copyright   Copyright (c) 2008-2011, OPUS 4 development team
 * @license     http://www.gnu.org/licenses/gpl.html General Public License
 * @version     $Id$
 */
/**
 * Releases all documents in server state unpublished.
 */
$docFinder = new Opus_DocumentFinder();
$docFinder->setServerState('unpublished');
foreach ($docFinder->ids() as $id) {
    $d = null;
    try {
        $d = new Opus_Document($id);
    } catch (Opus_Model_NotFoundException $e) {
        // document with id $id does not exist
        continue;
    }
    if (!is_null($d)) {
        $date = new Opus_Date();
        $date->setNow();
        $d->setServerState('published');
        $d->setServerDatePublished($date);
        $d->store();
        echo "publishing of document with id {$id} was successful\n";