Exemplo n.º 1
0
 function parseRecord($record)
 {
     $status = isset($record->header->attributes()->status) ? (string) $record->header->attributes()->status : FALSE;
     $identifier = (string) $record->header->identifier[0];
     $specs = array();
     foreach ($record->header->setSpec as $spec) {
         $specs[] = (string) $spec;
     }
     $metadata = null;
     if ($record->metadata) {
         $dom = dom_import_simplexml($record->metadata);
         if ($dom === FALSE) {
             $errorMsq = sprintf('[id: %s, status: %s] Error while parsing XML "%s"', $identifier, $status, $record->metadata);
             $this->parserErrors[] = $errorMsq;
             xc_log_error('DOMParser', $errorMsq);
         } elseif (!$dom->hasChildNodes()) {
             $errorMsq = sprintf('No children in metadata: [id: %s, status: %s] xml: "%s"', $identifier, $status, htmlentities($record->metadata));
             $this->parserErrors[] = $errorMsq;
             xc_log_error('DOMParser', $errorMsq);
         } else {
             foreach ($dom->childNodes as $childNode) {
                 if ($childNode->nodeType == XML_ELEMENT_NODE) {
                     $metadata = array('namespaceURI' => preg_replace('/\\/$/', '', $childNode->namespaceURI), 'childNode' => $childNode);
                     break;
                 }
             }
         }
     }
     return array('header' => array('@status' => $status, 'identifier' => $identifier, 'datestamp' => (string) $record->header->datestamp[0], 'setSpec' => $specs), 'metadata' => $metadata, 'about' => isset($record->abouts) ? $record->abouts : FALSE);
 }
Exemplo n.º 2
0
 private function _processContent()
 {
     if (strstr($this->rawContent, '<error') && !strstr($this->rawContent, '<ListRecords>')) {
         $text = $this->rawContent;
     } else {
         $text = $this->rawContent;
         if (!$this->need_debug) {
             $this->rawContent = '';
         }
     }
     $count = 0;
     $text = preg_replace("/^<\\?xml[^<>]+\\?>/", "", $text, -1, $count);
     if ($count != 1) {
         xc_log_error('regex', 'Malformed XML: no sheabang &mdash; ' . htmlentities($text));
         $this->parserErrors[] = 'no sheabang';
     }
     $count = 0;
     $text = preg_replace("/^\\s*<OAI-PMH\\b[^<>]+>/s", "", $text, -1, $count);
     if ($count != 1) {
         xc_log_error('regex', 'Malformed XML: no &lt;OAI-PMH&gt; &mdash; ' . htmlentities($text));
         $this->parserErrors[] = 'no OAI-PMH';
         return;
     }
     $count = 1;
     $text = preg_replace("/^\\s*<responseDate>(.*?)<\\/responseDate>/s", "", $text, -1, $count);
     if ($count != 1) {
         xc_log_error('regex', 'Malformed XML: no &lt;responseDate&gt; &mdash; ' . htmlentities($text));
         $this->parserErrors[] = 'no responseDate';
         return;
     }
     $count = 0;
     $text = preg_replace("/^\\s*<request(?: [^<>]+)?>(.*?)<\\/request>/s", "", $text, -1, $count);
     if ($count != 1) {
         xc_log_error('regex', 'Malformed XML: no &lt;request&gt; element in XML response: ' . htmlentities(substr($text, 0, 50)) . '&hellip;');
         $this->parserErrors[] = 'no request element';
         return;
     }
     $this->hasListRecords = FALSE;
     $this->hasErrors = FALSE;
     $count = 0;
     $text = preg_replace("/^\\s*<ListRecords>/", "", $text, -1, $count);
     if ($count != 1) {
         $errors = '';
         $size = drupal_strlen($text);
         $text = preg_replace_callback("/^\\s*<error(?: code=([\\'\"])(.*)([\\'\"]))?>(.*)<\\/error>/", array($this, 'setErrors'), $text);
         if ($size == drupal_strlen($text)) {
             xc_log_error('regex', 'Malformed XML: no ListRecords and no error elements &mdash; ' . htmlentities($text));
             $this->parserErrors[] = 'no ListRecords and error elements';
         } else {
             $this->hasErrors = TRUE;
         }
     } else {
         $this->hasListRecords = TRUE;
     }
     if (!$this->hasErrors && !$this->need_debug) {
         $this->rawContent = '';
     }
     if ($this->hasListRecords) {
         $resumption = '';
         $records = '';
         $is_changed_size = TRUE;
         while (!preg_match("/^\\s*<\\/ListRecords>/", $text) && $is_changed_size) {
             $count = 0;
             $text = preg_replace_callback("/^\\s*(<record>.*<\\/record>)/s", array($this, 'setRecords'), $text, -1, $count);
             if ($count > 0) {
                 $this->hasRecords = TRUE;
                 $is_changed_size = TRUE;
                 // raw_records is initialized by setRecords
                 $mod_records = str_replace('<record>', '<#######><record>', $this->raw_records);
                 $this->records = preg_split("/<#######>/", $mod_records, -1, PREG_SPLIT_NO_EMPTY);
                 $this->recordCount = count($this->records);
                 $mod_records = '';
                 $count = 0;
                 $text = preg_replace_callback("/^\\s*(<resumptionToken([^<>]*)>(.*?)<\\/resumptionToken>|<resumptionToken([^<>]*)\\s*\\/\\s*>)/", array($this, 'setResumption'), $text, -1, $count);
                 if ($count > 0) {
                     $is_changed_size = TRUE;
                     // resumption comes from setResumption
                     //echo 'recCount: ', $this->recordCount, ", resumption: ", $this->resumptionToken['text'], "\n";
                 } else {
                     $is_changed_size = FALSE;
                     //$this->parserErrors[] = 'no resumptionToken';
                 }
             } else {
                 $is_changed_size = FALSE;
                 $this->parserErrors[] = 'no record elements';
             }
         }
         // process inside ListRecords
         $count = 0;
         $text = preg_replace('/^\\s*<\\/ListRecords>/', '', $text, -1, $count);
         if ($count != 1) {
             xc_log_error('regex', 'Malformed XML: no /ListRecords &mdash; ' . htmlentities($text));
             $this->parserErrors[] = 'no ListRecords closer element';
         }
     } else {
         $this->parserErrors[] = 'no ListRecords element';
     }
     if ($this->hasListRecords || $this->hasErrors) {
         $count = 0;
         $text = preg_replace("/^\\s*<\\/OAI-PMH>/", '', $text, -1, $count);
         if ($count != 1) {
             xc_log_error('regex', 'Malformed XML: no /OAI-PMH &mdash; ' . htmlentities($text));
             $this->parserErrors[] = 'no /OAI-PMH closer element';
         }
     }
     // has ListRecords or error
 }
Exemplo n.º 3
0
 public function fetchHttpContent()
 {
     $t0 = microtime(TRUE);
     $this->requestUrl = $this->baseUrl . '?verb=' . $this->requestVerb;
     if (count($this->requestArguments)) {
         foreach ($this->requestArguments as $key => $value) {
             $this->requestUrl .= '&' . $key . '=' . $value;
         }
     }
     $cache_file = $this->getCacheFile();
     if (isset($cache_file) && file_exists($cache_file)) {
         try {
             $this->content = file_get_contents($cache_file);
         } catch (Exception $e) {
             xc_log_error('harvester', 'file get contents error: ' . $e->getMessage());
             $this->error = $e->getMessage();
         }
         $this->statistics['fetch']['curl_init'] = microtime(TRUE) - $t0;
         $this->httpCode = 200;
     } else {
         $ch = curl_init("");
         curl_setopt($ch, CURLINFO_HEADER_OUT, 1);
         // Get request header
         curl_setopt($ch, CURLOPT_HEADER, 1);
         // Get response header
         curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
         curl_setopt($ch, CURLOPT_NOPROGRESS, 1);
         curl_setopt($ch, CURLOPT_USERAGENT, 'Omeka-XC OAI harvester');
         curl_setopt($ch, CURLOPT_ENCODING, "gzip,deflate");
         curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
         if ($this->useAuthentication) {
             curl_setopt($ch, CURLOPT_USERPWD, $this->username . ":" . $this->password);
         }
         if (substr($this->requestUrl, 0, 5) == 'https') {
             curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
         }
         curl_setopt($ch, CURLOPT_URL, $this->requestUrl);
         $this->statistics['fetch']['curl_init'] = microtime(TRUE) - $t0;
         // get the content
         $response = curl_exec($ch);
         // extract HTTP response header
         $this->httpHeader = substr($response, 0, curl_getinfo($ch, CURLINFO_HEADER_SIZE));
         // extract content
         $this->content = substr($response, curl_getinfo($ch, CURLINFO_HEADER_SIZE));
         $this->requestHeader = curl_getinfo($ch, CURLINFO_HEADER_OUT);
         $this->httpCode = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE);
         $this->downloadSize = curl_getinfo($ch, CURLINFO_SIZE_DOWNLOAD);
         $this->curlInfo = curl_getinfo($ch);
         if (curl_errno($ch)) {
             $this->fetchError = array('code' => curl_errno($ch), 'text' => curl_error($ch));
         }
         curl_close($ch);
     }
     $this->hasFetched = TRUE;
 }