/** * Retrieves items from an OAI-enabled url. * * @param JTable $harvest The harvesting details. */ public function onJHarvestRetrieve($harvest) { $params = new \Joomla\Registry\Registry(); $params->loadString($harvest->params); if ($params->get('discovery.type') != 'oai') { return; } $resumptionToken = null; $http = JHttpFactory::getHttp(); $metadataPrefix = $params->get('discovery.plugin.metadata'); do { $queries = array(); if ($resumptionToken) { $queries['resumptionToken'] = $resumptionToken; // take a break to avoid any timeout issues. if (($sleep = $params->get('follow_on', self::FOLLOW_ON)) != 0) { sleep($sleep); } } else { $queries['metadataPrefix'] = $metadataPrefix; if ($harvest->harvested != JFactory::getDbo()->getNullDate()) { $queries['from'] = JFactory::getDate($harvest->harvested)->format('Y-m-d\\TH:i:s\\Z'); } if ($set = $params->get('set')) { $queries['set'] = $set; } $queries['until'] = $harvest->now->format('Y-m-d\\TH:i:s\\Z'); } $url = new JUri($params->get('discovery.url')); $url->setQuery($queries); $url->setVar('verb', 'ListRecords'); JHarvestHelper::log('Retrieving ' . (string) $url . ' for harvest...', JLog::DEBUG); $response = $http->get($url); $reader = new XMLReader(); $reader->xml($response->body); $prefix = null; $identifier = null; $resumptionToken = null; // empty the resumptionToken to force a reload per page. while ($reader->read()) { if ($reader->nodeType == XMLReader::ELEMENT) { $doc = new DOMDocument(); $doc->appendChild($doc->importNode($reader->expand(), true)); $node = simplexml_load_string($doc->saveXML()); $attributes = (array) $node->attributes(); if (isset($attributes['@attributes'])) { $attributes = $attributes['@attributes']; } switch ($reader->name) { case "record": try { $this->cache($harvest, $node); } catch (Exception $e) { JHarvestHelper::log($e->getMessage(), JLog::ERROR); } break; case 'responseDate': // only get the response date if fresh harvest. if (!$resumptionToken) { $this->harvested = JFactory::getDate($node); } break; case 'request': $prefix = JArrayHelper::getValue($attributes, 'metadataPrefix', null, 'string'); break; case 'error': if (JArrayHelper::getValue($attributes, 'code', null, 'string') !== "noRecordsMatch") { throw new Exception((string) $node, 500); } break; case 'resumptionToken': $resumptionToken = (string) $node; break; default: break; } } } } while ($resumptionToken); }
private function harvest() { $GLOBALS['application'] = $this; JTable::addIncludePath(JPATH_COMPONENT_ADMINISTRATOR . '/tables'); JModelLegacy::addIncludePath(JPATH_COMPONENT_ADMINISTRATOR . '/models', 'JHarvestModel'); $harvests = JModelLegacy::getInstance('Harvests', 'JHarvestModel'); $start = new JDate('now'); JHarvestHelper::log("started " . (string) $start); $dispatcher = JEventDispatcher::getInstance(); JPluginHelper::importPlugin('harvest'); JPluginHelper::importPlugin('ingest'); foreach ($harvests->getItems() as $harvest) { try { $now = new JDate('now'); $table = JTable::getInstance('Harvest', 'JHarvestTable'); $table->load($harvest->id); $table->now = $now; $dispatcher->trigger('onJHarvestRetrieve', array($table)); $dispatcher->trigger('onJHarvestIngest', array($table)); $query = JFactory::getDbo()->getQuery(true); $query->select('count(id)')->from('#__jharvest_cache'); $total = (int) JFactory::getDbo()->setQuery($query)->loadResult(); // only record last successful harvest which had records. if ($total > 0) { $table->harvested = $now->toSql(); } $table->runs++; if ((bool) $table->run_once === true) { $table->state = 2; } $table->store(); } catch (Exception $e) { JHarvestHelper::log($e->getMessage() . "\n"); JHarvestHelper::log($e->getTraceAsString() . "\n"); } // clear the cache, even if there is an error. JHarvestHelper::clearCache(); } $end = new JDate('now'); JHarvestHelper::log('ended ' . (string) $end); JHarvestHelper::log($start->diff($end)->format("%H:%I:%S")); }