public function listAction() { $this->indexAction(); $year = trim($this->_getParam('year')); if (preg_match('/^\\d{4}$/', $year) > 0) { $f = new Opus_DocumentFinder(); $f->setServerState('published'); $f->setServerDatePublishedRange($year, $year + 1); $this->view->ids = $f->ids(); } return $this->render('index'); }
public function listAction() { $this->indexAction(); $year = trim($this->_getParam('year')); if (preg_match('/^\\d{4}$/', $year) < 1) { return $this->render('index'); } $stats = array(); $emailToDocIds = array(); $f = new Opus_DocumentFinder(); $f->setServerState('published'); // $f->setServerDatePublishedRange($year, $year+1); foreach ($f->ids() as $docId) { $document = new Opus_Document($docId); $docStats = array('id' => $docId, 'authorEmails' => $this->_getAuthorEmails($document), 'errors' => 0, 'missingProject' => '', 'missingMsc' => '', 'missingReferee' => ''); if ($this->_countCollectionRole($document, 'projects') === 0) { $docStats['errors']++; $docStats['missingProject'] = true; } if ($this->_countSubjectMsc($document) === 0 and $this->_countCollectionRole($document, 'msc') === 0) { $docStats['errors']++; $docStats['missingMsc'] = true; } // if (count($document->getPersonReferee()) === 0) { // $docStats['errors']++; // $docStats['missingReferee'] = true; // } if ($docStats['errors'] > 0) { foreach ($docStats['authorEmails'] as $author) { $email = trim($author['email']); $name = trim($author['name']); $key = $name; if (!empty($email)) { $key .= " <{$email}>"; } if (!array_key_exists($key, $emailToDocIds)) { $emailToDocIds[$key] = array('email' => $email, 'name' => $name, 'ids' => array()); } $emailToDocIds[$key]['ids'][] = $docId; $emailToDocIds[$key]['ids'] = array_unique($emailToDocIds[$key]['ids']); sort($emailToDocIds[$key]['ids']); } } $stats[] = $docStats; } $this->view->stats = $stats; $this->view->email2docid = $emailToDocIds; return $this->render('index'); }
/** * redirects OPUS 3.x file names to the corresponding OPUS 4.0 file names * in addition it returns HTTP code 301 (moved permanently) */ public function opus3fileAction() { $docid = $this->getRequest()->getParam('opus3id'); $filename = $this->getRequest()->getParam('filename'); if (empty($docid) || empty($filename)) { return $this->_redirectToAndExit('index', array('failure' => 'missing argument'), 'index', 'home'); } $f = new Opus_DocumentFinder(); $ids = $f->setIdentifierTypeValue('opus3-id', $docid)->ids(); if (count($ids) < 1) { return $this->_redirectToAndExit('index', array('failure' => 'given opus3id is unknown'), 'index', 'home'); } if (count($ids) > 1) { return $this->_redirectToAndExit('index', array('failure' => 'given opus3id is not unique'), 'index', 'home'); } return $this->_redirect($this->getRequest()->getBaseUrl() . '/files/' . $ids[0] . '/' . $filename, array('prependBase' => false, 'code' => 301)); }
/** * make sure documents related to Collection[Role|]s in subtree are updated * (XML-Cache and server_date_published) * * @param Opus_Collection Starting point for recursive update to documents */ protected function updateDocuments($model) { if (is_null($model) || is_null($model->getId())) { // TODO explain why this is right return; } $collections = Opus_Db_TableGateway::getInstance('Opus_Db_Collections'); $collectionIdSelect = $collections->selectSubtreeById($model->getId(), 'id'); $documentFinder = new Opus_DocumentFinder(); $documentFinder->setCollectionId($collectionIdSelect); // clear affected documents from cache $xmlCache = new Opus_Model_Xml_Cache(); $xmlCache->removeAllEntriesWhereSubSelect($documentFinder->getSelectIds()); // update ServerDateModified for affected documents $date = new Opus_Date(); $date->setNow(); Opus_Document::setServerDateModifiedByIds($date, $documentFinder->ids()); }
protected function _preStore() { $type = $this->getType(); $value = $this->getValue(); if (isset($type) and isset($value) and $type === 'urn') { $finder = new Opus_DocumentFinder(); $docIds = $finder->setIdentifierTypeValue('urn', $value)->ids(); $errorMsg = "urn collision (documents " . implode(",", $docIds) . ")"; if ($this->isNewRecord() and count($docIds) > 0) { throw new Opus_Identifier_UrnAlreadyExistsException($errorMsg); } if (count($docIds) > 1) { throw new Opus_Identifier_UrnAlreadyExistsException($errorMsg); } if (count($docIds) == 1 and !in_array($this->getParentId(), $docIds)) { throw new Opus_Identifier_UrnAlreadyExistsException($errorMsg); } } return parent::_preStore(); }
/** * Check for each database document in serverState publish if it exists in * Solr index. Furthermore, compare field value of serverDateModified in * database and Solr index. * */ private function checkDatabase() { $finder = new Opus_DocumentFinder(); $finder->setServerState('published'); $ids = $finder->ids(); $this->logger->info('checking ' . $finder->count() . ' published documents for consistency.'); foreach ($ids as $id) { try { $doc = new Opus_Document($id); } catch (Opus_Model_NotFoundException $e) { // ignore: document was deleted from database in meantime continue; } $serverDataModified = $doc->getServerDateModified()->getUnixTimestamp(); // retrieve document from index and compare serverDateModified fields $query = new Opus_SolrSearch_Query(Opus_SolrSearch_Query::DOC_ID); $query->setField('id', $id); $resultList = $this->searcher->search($query, $this->validateDocIds); $results = $resultList->getResults(); if ($resultList->getNumberOfHits() == 0) { $this->logger->info("inconsistency found for document {$id}: document is in database, but is not in Solr index."); $this->numOfInconsistencies++; if ($this->forceReindexing($doc)) { $this->numOfUpdates++; } } else { if ($resultList->getNumberOfHits() == 1) { if ($results[0]->getServerDateModified() != $serverDataModified) { $this->numOfInconsistencies++; $this->logger->info("inconsistency found for document {$id}: mismatch between values of server_date_modified in database and Solr index."); if ($this->forceReindexing($doc)) { $this->numOfUpdates++; } } } else { $this->logger->err('unexpected state: document with id ' . $id . ' exists multiple times in index.'); } } } }
public function __construct($results = array(), $numberOfHits = 0, $queryTime = 0, $facets = array(), $validateDocIds = true, $log = null) { $this->log = $log; $this->numberOfHits = $numberOfHits; $this->queryTime = $queryTime; $this->facets = $facets; $this->results = array(); // make sure that documents returned from index exist in database if (!empty($results)) { if ($validateDocIds) { $docIds = array(); foreach ($results as $result) { array_push($docIds, $result->getId()); } $finder = new Opus_DocumentFinder(); $finder->setServerState('published'); $finder->setIdSubset($docIds); $docIdsDB = $finder->ids(); $notInDB = 0; foreach ($results as $result) { if (in_array($result->getId(), $docIdsDB)) { array_push($this->results, $result); } else { $notInDB++; } } $resultsSize = count($this->results); if ($notInDB > 0 && !is_null($this->log)) { $inDB = $resultsSize - $notInDB; $this->log->err("found inconsistency between database and solr index: index returns {$resultsSize} documents, but only " . $inDB . " found in database"); } } else { $this->results = $results; } } }
* details. You should have received a copy of the GNU General Public License * along with OPUS; if not, write to the Free Software Foundation, Inc., 51 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * @category Application * @author Thoralf Klein <*****@*****.**> * @copyright Copyright (c) 2008-2012, OPUS 4 development team * @license http://www.gnu.org/licenses/gpl.html General Public License * @version $Id: find_urns_for_docs_without_visible_files.php 11775 2013-06-25 14:28:41Z tklein $ */ /** * Dieses Script sucht Dokumente ohne sichtbare Dateien, fuer die bereits * eine URN vergeben wurde. */ $updateRequired = 0; $docfinder = new Opus_DocumentFinder(); $docfinder->setIdentifierTypeExists('urn'); echo "checking documents...\n"; foreach ($docfinder->ids() as $docId) { $doc = new Opus_Document($docId); $numVisibleFiles = 0; foreach ($doc->getFile() as $file) { if ($file->getVisibleInOai() == 1) { $numVisibleFiles++; } } if ($numVisibleFiles > 0) { continue; } echo "-- document {$docId} has an URN " . $doc->getIdentifierUrn(0)->getValue() . ", but no visible files\n"; }
* @category Application * @author Sascha Szott <*****@*****.**> * @copyright Copyright (c) 2008-2012, OPUS 4 development team * @license http://www.gnu.org/licenses/gpl.html General Public License * @version $Id: find_missing_published_docs_in_searchindex.php 12011 2013-07-05 19:39:58Z sszott $ */ /** * * Dieses Skript gibt alle IDs der Dokumente zurück, die im Server State * published sind, aber aufgrund eines Fehlers nicht im Index repräsentiert sind. * * Siehe dazu auch die Story OPUSVIER-2368. * */ $numOfErrors = 0; $finder = new Opus_DocumentFinder(); $finder->setServerState('published'); foreach ($finder->ids() as $docId) { // check if document with id $docId is already persisted in search index $query = new Opus_SolrSearch_Query(Opus_SolrSearch_Query::DOC_ID); $query->setField('id', $docId); $query->setReturnIdsOnly(true); $query->setRows(Opus_SolrSearch_Query::MAX_ROWS); $searcher = new Opus_SolrSearch_Searcher(); if ($searcher->search($query)->getNumberOfHits() != 1) { echo "document # {$docId} is not stored in search index\n"; $numOfErrors++; } } if ($numOfErrors > 0) { echo "{$numOfErrors} missing documents were found\n";
/** * Retrieve a document id by an oai identifier. * * @param string $oaiIdentifier * @result int */ private function getDocumentIdByIdentifier($oaiIdentifier) { $identifierParts = explode(":", $oaiIdentifier); $docId = null; switch ($identifierParts[0]) { case 'urn': $finder = new Opus_DocumentFinder(); $finder->setIdentifierTypeValue('urn', $oaiIdentifier); $finder->setServerStateInList($this->_deliveringDocumentStates); $docIds = $finder->ids(); $docId = $docIds[0]; break; case 'oai': if (isset($identifierParts[2])) { $docId = $identifierParts[2]; } break; default: throw new Oai_Model_Exception('The prefix of the identifier argument is unknown.', Oai_Model_Error::BADARGUMENT); break; } if (empty($docId) or !preg_match('/^\\d+$/', $docId)) { throw new Oai_Model_Exception('The value of the identifier argument is unknown or illegal in this repository.', Oai_Model_Error::IDDOESNOTEXIST); } return $docId; }
* @author Sascha Szott <*****@*****.**> * @copyright Copyright (c) 2008-2012, OPUS 4 development team * @license http://www.gnu.org/licenses/gpl.html General Public License * @version $Id$ */ /** * * Dieses Skript gibt alle IDs der Dokumente zurück, die mehr als einen Titel * und/oder Abstract in der Sprache des Dokuments besitzen. * * Diese Dokumente müssen aktuell manuell behandelt werden, da das Dokument * sonst nicht fehlerfrei indexiert werden kann (siehe OPUSVIER-2240). * */ $updateRequired = 0; $docfinder = new Opus_DocumentFinder(); foreach ($docfinder->ids() as $docId) { $doc = new Opus_Document($docId); $numOfTitles = 0; foreach ($doc->getTitleMain() as $title) { if ($title->getLanguage() === $doc->getLanguage()) { $numOfTitles++; } } $numOfAbstracts = 0; foreach ($doc->getTitleAbstract() as $abstract) { if ($abstract->getLanguage() === $doc->getLanguage()) { $numOfAbstracts++; } } if ($numOfTitles > 1 || $numOfAbstracts > 1) {
echo "type of title must be provided (e. g. parent)" . PHP_EOL; exit; } $enrichmentField = ''; if (is_null($options['enrichment'])) { echo "parameter --enrichment not specified; function will now exit" . PHP_EOL; exit; } else { $enrichmentField = $options['enrichment']; } $getType = 'getTitle' . ucfirst(strtolower($options['type'])); $addType = 'addTitle' . ucfirst(strtolower($options['type'])); if ($dryrun) { _log("TEST RUN: NO DATA WILL BE MODIFIED"); } $docFinder = new Opus_DocumentFinder(); $docIds = $docFinder->setEnrichmentKeyExists($enrichmentField)->ids(); _log(count($docIds) . " documents found"); foreach ($docIds as $docId) { $doc = new Opus_Document($docId); if ($doc->getType() == $doctype || $doctype == '') { $enrichments = $doc->getEnrichment(); foreach ($enrichments as $enrichment) { $enrichmentArray = $enrichment->toArray(); if ($enrichmentArray['KeyName'] == $enrichmentField) { $titles = $doc->{$getType}(); if (count($titles) > 0) { _log('Title ' . ucfirst(strtolower($options['type'])) . ' already exists for Document #' . $docId . '. Skipping.. '); } else { $title = $doc->{$addType}(); $title->setValue($enrichmentArray['Value']);
exit; } $documentType = @$options['doctype'] ? $options['doctype'] : false; $thesisPublisherId = @$options['publisherid'] ?: null; $thesisGrantorId = @$options['grantorid'] ?: null; $dryrun = isset($options['dryrun']); try { $dnbInstitute = new Opus_DnbInstitute($thesisPublisherId); } catch (Opus_Model_NotFoundException $omnfe) { _log("Opus_DnbInstitute with ID <{$thesisPublisherId}> does not exist.\nExiting..."); exit; } if ($dryrun) { _log("TEST RUN: NO DATA WILL BE MODIFIED"); } $docFinder = new Opus_DocumentFinder(); $docIds = $docFinder->setServerState('published'); if ($documentType != false) { $docFinder->setType($documentType); } $docIds = $docFinder->ids(); _log(count($docIds) . " documents " . ($documentType != false ? "of type '{$documentType}' " : '') . "found"); foreach ($docIds as $docId) { try { $doc = new Opus_Document($docId); if (count($doc->getFile()) == 0) { _log("Document <{$docId}> has no files, skipping.."); continue; } if (!is_null($thesisPublisherId)) { $thesisPublisher = $doc->getThesisPublisher();
/** * Returns IDs for published documents in range. * * @param $start Start of ID range * @param $end End of ID range * @return array Array of document IDs */ private function getDocumentIds($start, $end) { $finder = new Opus_DocumentFinder(); $finder->setServerState('published'); if (isset($start)) { $finder->setIdRangeStart($start); } if (isset($end)) { $finder->setIdRangeEnd($end); } return $finder->ids(); }
/** * Regression test for OPUSVIER-2144 * * IMPORTANT: Unit Test funktioniert nicht mehr, wenn die Zahl der Dokumente 20 übersteigt. */ public function testLastPageUrlEqualsNextPageUrlDocTypeArticle() { $docFinder = new Opus_DocumentFinder(); $docFinder->setType('article')->setServerState('published'); $this->assertEquals(20, $docFinder->count(), "Test data changed!"); $this->doStandardControllerTest('/solrsearch/index/search/searchtype/simple/query/*%3A*/browsing/true/doctypefq/article', null, null); $this->assertTrue(4 == substr_count($this->getResponse()->getBody(), '/solrsearch/index/search/searchtype/simple/query/%2A%3A%2A/browsing/true/doctypefq/article/start/10/rows/10">')); $this->assertNotContains('solrsearch/index/search/searchtype/simple/query/%2A%3A%2A/browsing/true/doctypefq/doctoralthesis/start/19/rows/10">', $this->getResponse()->getBody()); $this->assertEquals(20, $this->getNumOfHits()); }
* LICENCE * OPUS is free software; you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the Free Software * Foundation; either version 2 of the Licence, or any later version. * OPUS is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. You should have received a copy of the GNU General Public License * along with OPUS; if not, write to the Free Software Foundation, Inc., 51 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * @category Application * @author Thoralf Klein <*****@*****.**> * @copyright Copyright (c) 2013, OPUS 4 development team * @license http://www.gnu.org/licenses/gpl.html General Public License * @version $Id$ */ /** * Basic publication statistics (based on server_published_date) */ $df = new Opus_DocumentFinder(); $years = $df->setServerState('published')->groupedServerYearPublished(); sort($years); $cumSum = 0; foreach ($years as $year) { $df = new Opus_DocumentFinder(); $count = $df->setServerState('published')->setServerDatePublishedRange($year, $year + 1)->count(); $cumSum += $count; echo "year {$year}: {$cumSum} ({$count})\n"; } exit;
protected function invalidateDocumentCacheFor(Opus_Model_AbstractDb $model) { $documentFinder = new Opus_DocumentFinder(); $documentFinder->setDependentModel($model); $select = $documentFinder->getSelectIds(); $ids = $documentFinder->Ids(); $xmlCache = new Opus_Model_Xml_Cache(); $xmlCache->removeAllEntriesWhereSubSelect($select); $date = new Opus_Date(); $date->setNow(); Opus_Document::setServerDateModifiedByIds($date, $ids); }
/** * Prepare document finder. * * @return Opus_DocumentFinder */ protected function _prepareDocumentFinder() { $finder = new Opus_DocumentFinder(); $finder->setServerState(self::$_reviewServerState); $logger = $this->getLogger(); $userId = $this->_loggedUser->getUserId(); $onlyReviewerByUserId = false; // Add constraint for reviewer, if current user is *not* admin. if (Opus_Security_Realm::getInstance()->checkModule('admin')) { $message = "Review: Showing all unpublished documents to admin"; $logger->debug($message . " (user_id: {$userId})"); } elseif (Opus_Security_Realm::getInstance()->checkModule('review')) { if ($onlyReviewerByUserId) { $message = "Review: Showing only documents belonging to reviewer"; $finder->setEnrichmentKeyValue('reviewer.user_id', $userId); } else { $message = "Review: Showing all unpublished documents to reviewer"; } $logger->debug($message . " (user_id: {$userId})"); } else { $message = 'Review: Access to unpublished documents denied.'; $logger->err($message . " (user_id: {$userId})"); throw new Application_Exception($message); } return $finder; }
* terms of the GNU General Public License as published by the Free Software * Foundation; either version 2 of the Licence, or any later version. * OPUS is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. You should have received a copy of the GNU General Public License * along with OPUS; if not, write to the Free Software Foundation, Inc., 51 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * @category Application * @author Sascha Szott <*****@*****.**> * @copyright Copyright (c) 2008-2012, OPUS 4 development team * @license http://www.gnu.org/licenses/gpl.html General Public License * @version $Id: delete_non-demo_docs.php 10249 2012-02-15 17:00:09Z sszott $ */ /** * Erstellt die Demoinstanz, in der nur die Testdokumente mit den IDs 91 bis 110 * enthalten sind. * */ $finder = new Opus_DocumentFinder(); foreach ($finder->ids() as $id) { if (intval($id) < 91 || intval($id) > 110) { $doc = new Opus_Document($id); $doc->deletePermanent(); echo "document " . $id . " was deleted.\n"; } } $finder = new Opus_DocumentFinder(); echo "done -- num of remaining docs: " . $finder->count() . "\n"; exit;
* * LICENCE * OPUS is free software; you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the Free Software * Foundation; either version 2 of the Licence, or any later version. * OPUS is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. You should have received a copy of the GNU General Public License * along with OPUS; if not, write to the Free Software Foundation, Inc., 51 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * @category Application * @author Sascha Szott <*****@*****.**> * @copyright Copyright (c) 2008-2012, OPUS 4 development team * @license http://www.gnu.org/licenses/gpl.html General Public License * @version $Id: delete_all_docs.php 10244 2012-02-14 14:57:36Z sszott $ */ /** * Removes all documents * */ $finder = new Opus_DocumentFinder(); foreach ($finder->ids() as $id) { $doc = new Opus_Document($id); $doc->deletePermanent(); echo "document " . $id . " was deleted.\n"; } $finder = new Opus_DocumentFinder(); echo "done -- num of docs: " . $finder->count() . "\n"; exit;
* details. You should have received a copy of the GNU General Public License * along with OPUS; if not, write to the Free Software Foundation, Inc., 51 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * @category Application * @author Edouard Simon <*****@*****.**> * @copyright Copyright (c) 2011-2013, OPUS 4 development team * @license http://www.gnu.org/licenses/gpl.html General Public License * @version $Id: cron-update-document-cache.php 11732 2013-06-24 12:26:11Z esimon $ */ define('APPLICATION_ENV', 'development'); // Bootstrapping require_once dirname(__FILE__) . '/../common/bootstrap.php'; $opusDocCacheTable = new Opus_Db_DocumentXmlCache(); $db = Zend_Registry::get('db_adapter'); // $select = $db->select(); $select->from($opusDocCacheTable->info('name'), 'document_id'); $docFinder = new Opus_DocumentFinder(); $docFinder->setSubSelectNotExists($select); $docIds = $docFinder->ids(); echo "processing " . count($docIds) . " documents\n"; foreach ($docIds as $docId) { $model = new Opus_Document($docId); $cache = new Opus_Model_Xml_Cache(); // xml version 1 $omx = new Opus_Model_Xml(); $omx->setStrategy(new Opus_Model_Xml_Version1())->excludeEmptyFields()->setModel($model)->setXmlCache($cache); $dom = $omx->getDomDocument(); echo "Cache refreshed for document#{$docId}\n"; }
public function indexAction() { $this->_helper->mainMenu('home'); $finder = new Opus_DocumentFinder(); $finder->setServerState('published'); $this->view->totalNumOfDocs = $finder->count(); }
/** * prevent URN collisions: check that given URN is unique (in our database) */ private function _validateURN() { if (!array_key_exists('IdentifierUrn', $this->extendedData)) { return true; } $urn = $this->extendedData['IdentifierUrn']; $value = $urn['value']; if (trim($value) == '') { return true; } // check URN $urn for collision $finder = new Opus_DocumentFinder(); $finder->setIdentifierTypeValue('urn', $value); if ($finder->count() == 0) { return true; } $element = $this->form->getElement('IdentifierUrn'); if (!is_null($element)) { $element->clearErrorMessages(); $element->addError($this->translate('publish_error_urn_collision')); } return false; }
* terms of the GNU General Public License as published by the Free Software * Foundation; either version 2 of the Licence, or any later version. * OPUS is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. You should have received a copy of the GNU General Public License * along with OPUS; if not, write to the Free Software Foundation, Inc., 51 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * @category Application * @author Sascha Szott <*****@*****.**> * @copyright Copyright (c) 2008-2014, OPUS 4 development team * @license http://www.gnu.org/licenses/gpl.html General Public License * @version $Id$ */ /** * Dieses Script gibt die IDs aller veröffentlichten Dokumente aus, bei denen * Jane Doe der Name des Autors ODER der Name einer sonstigen beteiligten * Personen (advisor, contributor, editor, other, translator) ist */ $firstName = "Jane"; $lastName = "Doe"; $docfinder = new Opus_DocumentFinder(); // wichtig: müssen diesen Filter setzen, da im Index nur Dokument im Zustand published sind $docfinder->setServerState('published'); $select = $docfinder->getSelect(); $select->joinLeft(array('pd' => 'link_persons_documents'), 'd.id = pd.document_id AND (pd.role = "author"' . ' OR pd.role = "advisor" OR pd.role = "contributor" OR pd.role = "editor" OR pd.role = "other"' . ' OR pd.role = "translator")', array())->joinLeft(array('p' => 'persons'), 'pd.person_id = p.id', array())->where('p.first_name = ?', $firstName)->where('p.last_name = ?', $lastName)->group('d.id'); foreach ($docfinder->ids() as $docId) { echo "DocID {$docId}\n"; } exit;
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. You should have received a copy of the GNU General Public License * along with OPUS; if not, write to the Free Software Foundation, Inc., 51 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * @category Application * @author Sascha Szott <*****@*****.**> * @copyright Copyright (c) 2008-2011, OPUS 4 development team * @license http://www.gnu.org/licenses/gpl.html General Public License * @version $Id: export_import_all_docs.php 9044 2011-10-07 16:11:16Z sszott $ */ /** * Tries to export and import all documents. */ $docFinder = new Opus_DocumentFinder(); foreach ($docFinder->ids() as $id) { $doc = null; try { $doc = new Opus_Document($id); } catch (Opus_Model_NotFoundException $e) { // document with id $id does not exist continue; } echo "try to export document {$id} ... "; $xmlModelOutput = new Opus_Model_Xml(); $xmlModelOutput->setModel($doc); $xmlModelOutput->setStrategy(new Opus_Model_Xml_Version1()); $xmlModelOutput->excludeEmptyFields(); $domDocument = $xmlModelOutput->getDomDocument(); echo "export of document {$id} was successful.\n";
* OPUS is free software; you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the Free Software * Foundation; either version 2 of the Licence, or any later version. * OPUS is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. You should have received a copy of the GNU General Public License * along with OPUS; if not, write to the Free Software Foundation, Inc., 51 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * @category Application * @author Thoralf Klein <*****@*****.**> * @copyright Copyright (c) 2011, OPUS 4 development team * @license http://www.gnu.org/licenses/gpl.html General Public License * @version $Id$ */ // Bootstrapping require_once dirname(__FILE__) . '/../common/bootstrap.php'; $date = new DateTime(); $dateString = $date->sub(new DateInterval('P2D'))->format('Y-m-d'); $f = new Opus_DocumentFinder(); $f->setServerState('temporary')->setServerDateModifiedBefore($dateString); foreach ($f->ids() as $id) { $d = new Opus_Document($id); if ($d->getServerState() == 'temporary') { echo "deleting document: {$id}\n"; $d->deletePermanent(); } else { echo "NOT deleting document: {$id} because it has server state " . $d->getServerState(); } }
public function testNumberOfDocuments() { $odf = new Opus_DocumentFinder(); $this->assertEquals('0', $odf->count()); }
/** * Returns sum of all documents published before the $thresholdYear. */ public function getNumDocsUntil($thresholdYear) { $finder = new Opus_DocumentFinder(); $finder->setServerState('published'); $finder->setServerDatePublishedBefore($thresholdYear + 1); return $finder->count(); }
/** * Im Rahmen der Zuweisung von Dokumenten, die Collections der Collection Role * series zugeordnet sind, müssen verschiedene Konflikte behandelt werden. * * Im Folgenden werden nur Dokumente betrachtet, die mindestens einer Collection * der Collection Role series (kurz: series-Collection) zugeordnet sind. * * Fall 1 (Dokumente ohne IdentifierSerial): * Da die Bandnummer einer Schriftenreihe Opus_Series obligatorisch ist, können * Dokumente ohne IdentifierSerial nicht migriert werden. Sie verbleiben * unangetastet. Die Zuweisung(en) zu series-Collection(s) wird (werden) nicht * verändert. * * Fall 2 (Dokumente mit mehr als einem IdentifierSerial): * Da ein Dokument pro Schriftenreihe nur eine Bandnummer besitzen kann, können * Dokumente mit mehr als einem Wert für das Feld IdentifierSerial nicht * migriert werden. Sie verbleiben unangetastet. Die Zuweisung(en) zu * series-Collection(s) wird (werden) nicht verändert. * * Fall 3 (Dokumente mit einem IdentifierSerial): * Da in einer Schriftenreihe nicht zwei Dokumente mit der gleichen Bandnummer * existieren können, muss beim Zuweisen von Dokumenten darauf geachtet werden, * dass eine Bandnummer nicht mehrfach vergeben wird. * Wird versucht ein Dokument zu einer Schriftenreihe mit einer bereits * in Benutzung befindlichen Bandnummer zuzuweisen, so wird die Zuweisung * nicht durchgeführt. Die Zuweisung des Dokuments zur series-Collection wird * in diesem Fall unverändert beibehalten. * * Im Falle der erfolgreichen Zuweisung des Dokuments zu einer Schriftenreihe * wird die Verknüpfung mit der korrespondierenden series-Collection * entfernt. Außerdem wird das Feld IdentifierSerial entfernt. * * * @return array an array that contains both the number of conflicts found and * the number of documents that were successfully migrated */ private function migrateDocuments() { $numOfConflicts = 0; $numOfDocsMigrated = 0; $finder = new Opus_DocumentFinder(); $finder->setCollectionRoleId($this->seriesRole->getId()); $serialIdsInUse = array(); foreach ($finder->ids() as $docId) { $doc = new Opus_Document($docId); $serialIds = $doc->getIdentifierSerial(); $numOfSerialIds = count($serialIds); if ($numOfSerialIds == 0) { $this->logger->warn("doc #{$docId} : does not have a field IdentifierSerial -- leave it untouched"); $numOfConflicts++; continue; } if ($numOfSerialIds > 1) { $this->logger->warn("doc #{$docId} : has {$numOfSerialIds} values for field IdentifierSerial -- leave it untouched"); $numOfConflicts++; continue; } $serialId = $serialIds[0]->getValue(); $remainingCollections = array(); foreach ($doc->getCollection() as $collection) { // only consider collection in collection role series if ($collection->getRoleId() != $this->seriesRole->getId()) { array_push($remainingCollections, $collection); } else { $collectionId = $collection->getId(); if (!$collection->isRoot()) { // check for conflict if (array_key_exists($collectionId, $serialIdsInUse) && in_array($serialId, $serialIdsInUse[$collectionId])) { // conflict was found: serialId for series $collectionId already in use $this->logger->warn("doc #{$docId} : could not assign to series #{$collectionId}: value {$serialId} already in use"); $this->logger->warn("doc #{$docId} : leave assignment to collection #{$collectionId} untouched"); array_push($remainingCollections, $collection); $numOfConflicts++; } else { // no conflict $series = new Opus_Series($collectionId); $doc->addSeries($series)->setNumber($serialId); $doc->setIdentifierSerial(array()); // mark usage of serialId for collection $collectionId if (array_key_exists($collectionId, $serialIdsInUse)) { array_push($serialIdsInUse[$collectionId], $serialId); } else { $serialIdsInUse[$collectionId] = array($serialId); } $this->logger->info("doc #{$docId} : assign document to series #{$collectionId} with value {$serialId}"); $this->logger->info("doc #{$docId} : removed assignment from collection #{$collectionId}"); $this->logger->info("doc #{$docId} : removed field IdentifierSerial with value " . $serialId); $numOfDocsMigrated++; } } else { // series root collection assignment will not be migrated $this->logger->warn("doc #{$docId} : is assigned to root collection #{$collectionId} of collection role series: leave assignment untouched"); array_push($remainingCollections, $collection); $numOfConflicts++; } } } $doc->setCollection($remainingCollections); $doc->unregisterPlugin('Opus_Document_Plugin_Index'); $doc->store(); } return array('numOfConflicts' => $numOfConflicts, 'numOfDocsMigrated' => $numOfDocsMigrated); }
/** * Regression test for OPUSVIER-849 */ public function testStartPageContainsTotalNumOfDocs() { // get total number of documents from all doc search $this->dispatch('/solrsearch/index/search/searchtype/all'); $document = new DOMDocument(); $document->loadHTML($this->getResponse()->getBody()); $element = $document->getElementById('search-result-numofhits'); $numOfHits = $element->firstChild->textContent; $docsInIndex = $this->getDocsInSearchIndex(); $numOfIndexDocs = $docsInIndex->getNumberOfHits(); $this->assertEquals($numOfIndexDocs, $numOfHits); $this->getResponse()->clearBody(); $this->dispatch('/home'); $document = new DOMDocument(); $document->loadHTML($this->getResponse()->getBody()); $element = $document->getElementById('solrsearch-totalnumofdocs'); $numOfDocs = $element->firstChild->textContent; $docFinder = new Opus_DocumentFinder(); $docFinder->setServerState('published'); $numOfDbDocs = $docFinder->count(); $this->assertEquals($numOfDbDocs, $numOfDocs); // kurze Erklärung des Vorhabens: die Dokumentanzahl bei der Catch-All-Suche // wird auf Basis einer Indexsuche ermittelt; die Anzahl der Dokument, die // auf der Startseite erscheint, wird dagegen über den DocumentFinder // ermittelt: im Idealfall sollten diese beiden Zahlen nicht voneinander // abweichen // wenn sie abweichen, dann aufgrund einer Inkonsistenz zwischen Datenbank // und Suchindex (das sollte im Rahmen der Tests eigentlich nicht auftreten) if ($numOfDbDocs != $numOfIndexDocs) { // ermittle die Doc-IDs, die im Index, aber nicht in der DB existieren // bzw. die in der DB, aber nicht im Index existieren $idsIndex = array(); $results = $docsInIndex->getResults(); foreach ($results as $result) { array_push($idsIndex, $result->getId()); } $idsDb = $docFinder->ids(); $idsIndexOnly = array_diff($idsIndex, $idsDb); $this->assertEquals(0, count($idsIndexOnly), 'Document IDs in search index, but not in database: ' . var_export($idsIndexOnly, true)); $idsDbOnly = array_diff($idsDb, $idsIndex); $this->assertEquals(0, count($idsDbOnly), 'Document IDs in database, but not in search index: ' . var_export($idsDbOnly, true)); $this->assertEquals($numOfDbDocs, $numOfIndexDocs, "number of docs in database ({$numOfDbDocs}) and search index ({$numOfIndexDocs}) differ from each other"); } $this->assertEquals($numOfDocs, $numOfHits); }