示例#1
0
 /**
  * Query the repository for updates, and run a callback for each item.
  * Will continue across resumption tokens until there's nothing left.
  *
  * @param string $from timestamp to start at (???)
  * @param callable $callback
  * @return mixed true on success, OAIError on failure
  * @throws OAIError
  */
 function listUpdates($from, $callback)
 {
     $token = false;
     do {
         if ($token) {
             echo "-> resuming at {$token}\n";
             $params = array('verb' => 'ListRecords', 'metadataPrefix' => 'mediawiki', 'resumptionToken' => $token);
         } else {
             $params = array('verb' => 'ListRecords', 'metadataPrefix' => 'mediawiki', 'from' => oaiDatestamp($from));
         }
         $xml = $this->callRepo($params);
         $doc = new DOMDocument('1.0', 'utf-8');
         if (!$doc->loadXML($xml)) {
             throw new OAIError("Invalid XML returned from OAI repository.");
         }
         $xp = new DOMXPath($doc);
         $xp->registerNamespace('oai', 'http://www.openarchives.org/OAI/2.0/');
         $errors = $this->checkResponseErrors($xp);
         if ($errors) {
             return $errors;
         }
         $resultSet = $xp->query('/oai:OAI-PMH/oai:ListRecords/oai:record');
         foreach ($resultSet as $node) {
             $record = OAIUpdateRecord::newFromNode($node);
             call_user_func($callback, $record);
             unset($record);
         }
         $tokenSet = $xp->query('/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken');
         $token = $tokenSet->length ? $tokenSet->item(0)->textContent : false;
         unset($tokenSet);
         unset($resultSet);
         unset($xp);
         unset($doc);
         unset($xml);
     } while ($token);
     return true;
 }
示例#2
0
 /**
  * Note: old versions that worked on MW 1.4 included the page text as
  * the dc:description field. Then it was broken for a long time. :)
  * I'm now stripping out the text, as it's not really appropriate
  * for the description field.
  *
  * This allows the use of oai_dc format to grab metadata about the pages
  * without fetching the actual page content, which should be more useful
  * for those simply wanting a set of page update notifications.
  */
 function renderDublinCore()
 {
     $title = Title::makeTitle($this->_row->page_namespace, $this->_row->page_title);
     global $wgMimeType, $wgContLanguageCode;
     $out = oaiTag('oai_dc:dc', array('xmlns:oai_dc' => 'http://www.openarchives.org/OAI/2.0/oai_dc/', 'xmlns:dc' => 'http://purl.org/dc/elements/1.1/', 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance', 'xsi:schemaLocation' => 'http://www.openarchives.org/OAI/2.0/oai_dc/ ' . 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd')) . "\n" . oaiTag('dc:title', array(), $title->getPrefixedText()) . "\n" . oaiTag('dc:language', array(), $wgContLanguageCode) . "\n" . oaiTag('dc:type', array(), 'Text') . "\n" . oaiTag('dc:format', array(), $wgMimeType) . "\n" . oaiTag('dc:identifier', array(), $title->getCanonicalUrl()) . "\n" . oaiTag('dc:contributor', array(), $this->_row->rev_user_text) . "\n" . oaiTag('dc:date', array(), oaiDatestamp($this->getDatestamp())) . "\n" . "</oai_dc:dc>\n";
     return $out;
 }