public function indexAction() { require_once APPLICATION_PATH . '/' . $this->getRequest()->getModuleName() . '/models/OaiPmh.php'; $this->view->responseDate = date(OaiPmh::XS_DATETIME_FORMAT); $oai = new OaiPmh($this->getRequest()->getParams(), $this->view); $oai->setBaseUrl('http:' . ($_SERVER['SERVER_PORT'] == 443 ? 's' : '') . '//' . $_SERVER['HTTP_HOST'] . $this->getFrontController()->getRouter()->assemble(array())); $this->view->oai = $oai; }
/** * myOai_harvest - reworked oaiharvester _harvest function * Harvest the records * @param $schedule_id The ID of the schedule * @param $schedule_name The name of the schedule * @param $provider_id The ID of data provider * @param $url The base URL to harvest * @param $set The set parameter * @param $format The metadataPrefix parameter * @return unknown_type */ function myOai_harvest($schedule_id, $schedule_name, $provider_id, $url, $set, $metadataPrefix) { set_time_limit(0); $is_first = TRUE; $count = 0; while ($is_first == TRUE || isset($oai) && $oai['ListRecords']['resumptionToken']) { // do first request (once), or the resumption request (repeat) if ($is_first) { if (strlen($set) > 0) { $params = array('set' => $set, 'metadataPrefix' => $metadataPrefix); } else { $params = array('metadataPrefix' => $metadataPrefix); } $is_first = FALSE; } else { $params = array('resumptionToken' => $oai['ListRecords']['resumptionToken']['text']); myOaiLogMsg('token:[' . $oai['ListRecords']['resumptionToken']['text'] . ']'); } $request = $url . '?' . http_build_query(array_merge(array('verb' => 'ListRecords'), $params)); $msg = 'Request: [' . $request . ']'; myOaiLogMsg($msg); $oai = OaiPmh::factory('ListRecords', $url, $params); // error? or records processed? if (isset($oai['error'])) { $msg = print_r($oai, true); myOaiLogMsg($msg); } else { $msg = 'Number of records: ' . count($oai['ListRecords']['record']); myOaiLogMsg($msg); // this is the core of what we are trying to get at, this will get into our bridge from here: biblio_oaiharvester_process_record, and do the real work we need done foreach ($oai['ListRecords']['record'] as $record) { module_invoke_all('oaiharvester_process_record', $record); } myOaiLogMsg('oaitoken:[' . $oai['ListRecords']['resumptionToken']['text'] . ']'); } } // end while // done module_invoke_all('oaiharvester_request_processed'); }
/** * myOai_harvest - reworked oaiharvester _harvest function * Harvest the records * @param $schedule_id The ID of the schedule * @param $schedule_name The name of the schedule * @param $provider_id The ID of data provider * @param $url The base URL to harvest * @param $set The set parameter * @param $format The metadataPrefix parameter * @return unknown_type */ function myOai_harvest($schedule_id, $schedule_name, $provider_id, $url, $set, $metadataPrefix, $from_date, $until_date) { set_time_limit(0); $is_first = true; $count = 0; $lastResumption = false; $flagFromUntil = false; $newFromFlag = false; $flagNextFromUntil = false; $flagResumption = false; $newToken = ''; $flagFromUntil = $this->checkFromUntil($from_date, $until_date); $lastResumption = $this->getLastResumption($this->harvestId); // fast fix for stuck AMNH $flagUseResOpts = true; if ($schedule_id == 17) { $flagUseResOpts = false; } // if no resumption // if no from until // check for the date or resumption token, and flag the item // check for the condition to stop the loop either a set number of a throttle of loops // make sure to store the resumption token or date from until values // bump the from until dates // while ($is_first == true || isset($oai) && $oai['ListRecords']['resumptionToken']) { // do first request (once), or the resumption request (repeat) if ($is_first) { if (strlen($set) > 0) { $params = array('set' => $set, 'metadataPrefix' => $metadataPrefix); } else { $params = array('metadataPrefix' => $metadataPrefix); } $is_first = false; // check if we are rolling through from a previous large set or using the dates to window over if ($lastResumption || $flagFromUntil) { $is_first = false; if ($flagFromUntil) { $params = array('set' => $set, 'metadataPrefix' => $metadataPrefix, 'from' => $from_date, 'until' => $until_date); $msg = '(' . $this->harvestId . ') From date: [' . $from_date . ']' . 'Until date: [' . $until_date . ']'; $this->myOaiLogMsg($msg); $flagFromUntil = false; // bump date flag here $flagNextFromUntil = true; } // a resumption token will take precedence over a date if ($lastResumption) { $flagResumption = true; //$params = array('resumptionToken' => $lastResumption); if ($flagUseResOpts) { $params = array('set' => $set, 'metadataPrefix' => $metadataPrefix, 'resumptionToken' => $lastResumption); } else { $params = array('resumptionToken' => $lastResumption); } } } } else { //$params = array('resumptionToken' => $oai['ListRecords']['resumptionToken']['text']); if ($flagUseResOpts) { $params = array('set' => $set, 'metadataPrefix' => $metadataPrefix, 'resumptionToken' => $oai['ListRecords']['resumptionToken']['text']); } else { $params = array('resumptionToken' => $oai['ListRecords']['resumptionToken']['text']); } $flagResumption = true; $this->myOaiLogMsg('token:[' . $oai['ListRecords']['resumptionToken']['text'] . ']'); } $request = $url . '?' . http_build_query(array_merge(array('verb' => 'ListRecords'), $params)); $msg = 'Request: [' . $request . ']'; $this->myOaiLogMsg($msg); $oai = OaiPmh::factory('ListRecords', $url, $params); // error? or records processed? if (isset($oai['error'])) { // errors // if the error is something like: <error code="noRecordsMatch"> // and we are running a daily from until, then just let it pass as successful, there was no data for that day and move on if ($this->checkOaiError($oai)) { // if using from until dates, bump the dates by a day if ($flagNextFromUntil && !$flagResumption) { //$msg = 'No data for this range: From date: [' . $from_date . ']' . 'Until date: [' . $until_date . ']'; $msg = 'No data. Range: [' . $from_date . ']' . ' - [' . $until_date . ']'; $this->myOaiLogMsg($msg); $from_date = $until_date; $until_date = $this->makeNextDate($until_date); $this->setFromUntil($this->harvestId, $from_date, $until_date); $msg = '(' . $this->harvestId . ') NEXT From date: [' . $from_date . ']' . 'Until date: [' . $until_date . ']'; $this->myOaiLogMsg($msg); $flagNextFromUntil = false; } } else { $msg = print_r($oai, true); $this->myOaiLogMsg($msg); } } else { // records processed $msg = 'Number of records: ' . count($oai['ListRecords']['record']); $this->myOaiLogMsg($msg); // this is the core of what we are trying to get at, this will get into our bridge from here: biblio_oaiharvester_process_record, and do the real work we need done foreach ($oai['ListRecords']['record'] as $record) { module_invoke_all('oaiharvester_process_record', $record); } // find a resumption token if (isset($oai['ListRecords']['resumptionToken']['text'])) { $this->myOaiLogMsg('oaitoken:[' . $oai['ListRecords']['resumptionToken']['text'] . ']'); $newToken = $oai['ListRecords']['resumptionToken']['text']; } if (strlen($newToken) > 0) { $this->setLastResumption($this->harvestId, $newToken); $newToken = ''; $flagResumption = true; } else { $this->clearLastResumption($this->harvestId); $flagResumption = false; } // if using from until dates, bump the dates by a day if ($flagNextFromUntil && !$flagResumption) { $from_date = $until_date; $until_date = $this->makeNextDate($until_date); $this->setFromUntil($this->harvestId, $from_date, $until_date); $msg = '(' . $this->harvestId . ') NEXT From date: [' . $from_date . ']' . 'Until date: [' . $until_date . ']'; $this->myOaiLogMsg($msg); $flagNextFromUntil = false; } } } // end while // done module_invoke_all('oaiharvester_request_processed'); }