/** * Query the repository for updates, and run a callback for each item. * Will continue across resumption tokens until there's nothing left. * * @param string $from timestamp to start at (???) * @param callable $callback * @return mixed true on success, OAIError on failure * @throws OAIError */ function listUpdates($from, $callback) { $token = false; do { if ($token) { echo "-> resuming at {$token}\n"; $params = array('verb' => 'ListRecords', 'metadataPrefix' => 'mediawiki', 'resumptionToken' => $token); } else { $params = array('verb' => 'ListRecords', 'metadataPrefix' => 'mediawiki', 'from' => oaiDatestamp($from)); } $xml = $this->callRepo($params); $doc = new DOMDocument('1.0', 'utf-8'); if (!$doc->loadXML($xml)) { throw new OAIError("Invalid XML returned from OAI repository."); } $xp = new DOMXPath($doc); $xp->registerNamespace('oai', 'http://www.openarchives.org/OAI/2.0/'); $errors = $this->checkResponseErrors($xp); if ($errors) { return $errors; } $resultSet = $xp->query('/oai:OAI-PMH/oai:ListRecords/oai:record'); foreach ($resultSet as $node) { $record = OAIUpdateRecord::newFromNode($node); call_user_func($callback, $record); unset($record); } $tokenSet = $xp->query('/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken'); $token = $tokenSet->length ? $tokenSet->item(0)->textContent : false; unset($tokenSet); unset($resultSet); unset($xp); unset($doc); unset($xml); } while ($token); return true; }
/** * Note: old versions that worked on MW 1.4 included the page text as * the dc:description field. Then it was broken for a long time. :) * I'm now stripping out the text, as it's not really appropriate * for the description field. * * This allows the use of oai_dc format to grab metadata about the pages * without fetching the actual page content, which should be more useful * for those simply wanting a set of page update notifications. */ function renderDublinCore() { $title = Title::makeTitle($this->_row->page_namespace, $this->_row->page_title); global $wgMimeType, $wgContLanguageCode; $out = oaiTag('oai_dc:dc', array('xmlns:oai_dc' => 'http://www.openarchives.org/OAI/2.0/oai_dc/', 'xmlns:dc' => 'http://purl.org/dc/elements/1.1/', 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance', 'xsi:schemaLocation' => 'http://www.openarchives.org/OAI/2.0/oai_dc/ ' . 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd')) . "\n" . oaiTag('dc:title', array(), $title->getPrefixedText()) . "\n" . oaiTag('dc:language', array(), $wgContLanguageCode) . "\n" . oaiTag('dc:type', array(), 'Text') . "\n" . oaiTag('dc:format', array(), $wgMimeType) . "\n" . oaiTag('dc:identifier', array(), $title->getCanonicalUrl()) . "\n" . oaiTag('dc:contributor', array(), $this->_row->rev_user_text) . "\n" . oaiTag('dc:date', array(), oaiDatestamp($this->getDatestamp())) . "\n" . "</oai_dc:dc>\n"; return $out; }