public static function validate($uri, $only_well_formedness = false)
 {
     if (!$uri) {
         return false;
     }
     // try to find the XSD and fail if it cannot
     if ($xsd = self::get_schema_location($uri)) {
         $schema_location = $xsd;
     } else {
         return array("There was no XSD defined in this XML file");
     }
     // we have had problems in the past with Services being unavailable, so
     // instead of checking there just use the local schemas which are the same
     if ($schema_location == 'http://services.eol.org/schema/content_0_1.xsd') {
         $schema_location = WEB_ROOT . 'applications/schema/content_0_1.xsd';
     }
     if ($schema_location == 'http://services.eol.org/schema/content_0_2.xsd') {
         $schema_location = WEB_ROOT . 'applications/schema/content_0_2.xsd';
     }
     if ($schema_location == 'http://services.eol.org/schema/content_0_3.xsd') {
         $schema_location = WEB_ROOT . 'applications/schema/content_0_3.xsd';
     }
     if ($schema_location == 'http://services.eol.org/schema/content_0_3_18.xsd') {
         $schema_location = WEB_ROOT . 'applications/schema/content_0_3_18.xsd';
     }
     if ($schema_location == 'http://services.eol.org/schema/content_0_4.xsd') {
         $schema_location = WEB_ROOT . 'applications/schema/content_0_4.xsd';
     }
     if ($schema_location == 'http://services.eol.org/schema/content_1_0.xsd') {
         $schema_location = WEB_ROOT . 'applications/schema/content_1_0.xsd';
     }
     libxml_use_internal_errors(true);
     libxml_clear_errors();
     $reader = new \XMLReader();
     $reader->open($uri, 'utf8');
     if (!$only_well_formedness) {
         if (@(!$reader->setSchema($schema_location))) {
             write_to_resource_harvesting_log("The specified schema could not be loaded or contained errors: {$schema_location}");
             return array("The specified schema could not be loaded or contained errors: {$schema_location}");
         }
     }
     libxml_clear_errors();
     while (@$reader->read()) {
         // empty loop to load errors into libxml error cache
         //if($reader->name == "#text") echo $reader->name .":". $reader->value."\n";
         // if(libxml_get_errors())
         // {
         //     echo libxml_get_last_error()->message."\n";
         //     libxml_clear_errors();
         // }
     }
     if ($errors = self::get_errors()) {
         write_to_resource_harvesting_log(implode(",", $errors));
         return $errors;
     }
     return true;
 }
 private function collect_dataset_attribution()
 {
     $this->dataset_metadata = array();
     if (is_dir($this->harvest_event->resource->archive_path() . "dataset") && file_exists($this->harvest_event->resource->archive_path() . "dataset/col.xml")) {
         foreach (glob($this->harvest_event->resource->archive_path() . "dataset/*") as $filename) {
             if (preg_match("/\\/([0-9]+)\\.xml\$/", $filename, $arr)) {
                 $dataset_id = $arr[1];
             }
             $xml = simplexml_load_file($filename);
             $title = trim($xml->dataset->title);
             if (preg_match("/^(.*) in the Catalogue of Life/", $title, $arr)) {
                 $title = trim($arr[1]);
             }
             $title = str_replace("  ", " ", $title);
             $editors = trim($xml->additionalMetadata->metadata->sourceDatabase->authorsAndEditors);
             if (preg_match("/^(.*)\\. For a full list/", $editors, $arr)) {
                 $editors = trim($arr[1]);
             }
             if (preg_match("/^(.*); for detailed information/", $editors, $arr)) {
                 $editors = trim($arr[1]);
             }
             $editors = str_replace("  ", " ", $editors);
             $abbreviatedName = trim($xml->additionalMetadata->metadata->sourceDatabase->abbreviatedName);
             $this->dataset_metadata[$abbreviatedName]['title'] = $title;
             $this->dataset_metadata[$abbreviatedName]['editors'] = $editors;
             $this->dataset_metadata[$abbreviatedName]['abbreviatedName'] = $abbreviatedName;
             $this->dataset_metadata[$abbreviatedName]['datasetID'] = $dataset_id;
             $this->dataset_metadata[$dataset_id] =& $this->dataset_metadata[$abbreviatedName];
         }
         // now go grab the citation information from the COL website
         $url = "http://www.catalogueoflife.org/col/info/cite";
         $options_for_log_harvest = array('resource_id' => $this->harvest_event->resource->id);
         $html = Functions::get_remote_file($url, $options_for_log_harvest);
         preg_match_all("/<p><strong>(.*?)<\\/strong><br\\/>(.*?)<\\/p>/ims", $html, $matches, PREG_SET_ORDER);
         foreach ($matches as $match) {
             $dataset_name = $match[1];
             if (preg_match("/^(.*) via ITIS/", $dataset_name, $arr)) {
                 $dataset_name = trim($arr[1]);
             }
             $citation = $match[2];
             if (isset($this->dataset_metadata[$dataset_name])) {
                 $this->dataset_metadata[$dataset_name]['citation'] = $citation;
             } elseif ($dataset_name == "Species 2000 Common Names" && isset($this->dataset_metadata["Catalogue of Life"])) {
                 $this->dataset_metadata["Catalogue of Life"]['citation'] = $citation;
             }
         }
         if (!isset($this->dataset_metadata["Catalogue of Life"]['citation']) || !isset($this->dataset_metadata["FishBase"]['citation'])) {
             echo "Tried getting attribution for Catalogue of Life datasets, but there was a problem\n";
             write_to_resource_harvesting_log("Tried getting attribution for Catalogue of Life datasets, but there was a problem");
             exit;
         }
     }
 }
 public function crop_image_pct($data_object_id, $x_pct, $y_pct, $w_pct, $h_pct = NULL)
 {
     //function called by a user interaction (custom crop). If h is not given, assume a square crop
     $data_object = DataObject::find($data_object_id);
     if (!$data_object) {
         write_to_resource_harvesting_log("ContentManager: Cropping invalid data object ID {$data_object_id}");
         trigger_error("ContentManager: Cropping invalid data object ID {$data_object_id}", E_USER_NOTICE);
     } elseif ($data_object->is_image() && $data_object->object_cache_url) {
         /* we have problems because we don't actually save the filename extension of the original file.
            Until we can get this from the database, we hack around this as follows */
         $cache_path = self::cache_num2path($data_object->object_cache_url);
         foreach (self::$valid_image_extensions as $ext) {
             $image_url = CONTENT_LOCAL_PATH . $cache_path . "." . $ext;
             if (is_file($image_url)) {
                 break;
             }
         }
         // If we can't find the original download, save the local or previous jpg versions as the original (yuck)
         if (!is_file($image_url)) {
             $image_url = CONTENT_LOCAL_PATH . $cache_path . "_orig.jpg";
         }
         if (!is_file($image_url)) {
             $image_url = "http://content.eol.org/content/" . $cache_path . "_orig.jpg";
         }
         return $this->grab_file($image_url, "image", array('crop_pct' => array($x_pct, $y_pct, $w_pct, $h_pct), 'data_object_id' => $data_object->id, 'data_object_guid' => $data_object->guid));
     }
 }
 public function insert_data($options = array())
 {
     if ($options['data']) {
         $query = self::append_namespaces_to_query();
         $query .= " INSERT DATA INTO <" . $options['graph_name'] . "> { " . implode($options['data'], " .\n") . " }";
         $ch = curl_init();
         curl_setopt($ch, CURLOPT_URL, $this->upload_uri);
         curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-type: application/sparql-query'));
         curl_setopt($ch, CURLOPT_HEADER, false);
         curl_setopt($ch, CURLOPT_FAILONERROR, 1);
         curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
         curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 20);
         curl_setopt($ch, CURLOPT_TIMEOUT, 60);
         curl_setopt($ch, CURLOPT_HTTPAUTH, CURLAUTH_BASIC);
         curl_setopt($ch, CURLOPT_USERPWD, $this->username . ":" . $this->password);
         curl_setopt($ch, CURLOPT_POSTFIELDS, $query);
         $result = curl_exec($ch);
         if (curl_errno($ch) == 0) {
             curl_close($ch);
             return $result;
         }
         echo "\n\n=========================================\n";
         echo 'Curl error: ' . curl_error($ch) . "\n\n";
         echo "{$query}\n\n";
         print_r($options);
         print_r(serialize($options['data']));
         echo "===========================================\n\n";
         write_to_resource_harvesting_log("ERROR: Inserting data in virtuoso");
         write_to_resource_harvesting_log('Curl error: ' . curl_error($ch));
         write_to_resource_harvesting_log($query);
         return false;
     }
 }
 public function validate_row($row, $parameters)
 {
     static $i = 0;
     $i++;
     if ($i % 10000 == 0 && $GLOBALS['ENV_DEBUG']) {
         echo "{$i}: " . time_elapsed() . " :: " . memory_get_usage() . "\n";
         write_to_resource_harvesting_log($i . ": " . time_elapsed() . "::" . memory_get_usage());
     }
     $file_location = $parameters['archive_table_definition']->location;
     $new_exceptions = array();
     if ($parameters['row_type'] == 'http://eol.org/schema/media/document') {
         if (@(!$row['http://purl.org/dc/terms/license']) && @(!$row['http://ns.adobe.com/xap/1.0/rights/UsageTerms'])) {
             if ($this->archive_resource && $this->archive_resource->license && $this->archive_resource->license->source_url) {
                 $row['http://ns.adobe.com/xap/1.0/rights/UsageTerms'] = $this->archive_resource->license->source_url;
                 unset($row['http://purl.org/dc/terms/license']);
             }
         }
         $new_exceptions = \eol_schema\MediaResource::validate_by_hash($row, $this->skip_warnings);
         $this->append_identifier_error($row, 'http://purl.org/dc/terms/identifier', $parameters, $new_exceptions);
         if (!self::any_exceptions_of_type_error($new_exceptions)) {
             if (@($v = $row['http://purl.org/dc/terms/type'])) {
                 $this->add_stat('type', $parameters['row_type'], $file_location, $v);
             }
             if (@($v = $row['http://rs.tdwg.org/audubon_core/subtype'])) {
                 $this->add_stat('subtype', $parameters['row_type'], $file_location, $v);
             }
             if (@($v = $row['http://ns.adobe.com/xap/1.0/rights/UsageTerms'])) {
                 $this->add_stat('license', $parameters['row_type'], $file_location, $v);
             }
             if (@($v = $row['http://iptc.org/std/Iptc4xmpExt/1.0/xmlns/CVterm'])) {
                 $this->add_stat('subject', $parameters['row_type'], $file_location, $v);
             }
             if (@($v = $row['http://purl.org/dc/terms/language'])) {
                 $this->add_stat('language', $parameters['row_type'], $file_location, $v);
             }
             if (@($v = $row['http://purl.org/dc/terms/format'])) {
                 $this->add_stat('format', $parameters['row_type'], $file_location, $v);
             }
         }
     } elseif ($parameters['row_type'] == 'http://rs.tdwg.org/dwc/terms/taxon') {
         $new_exceptions = \eol_schema\Taxon::validate_by_hash($row, $this->skip_warnings);
         $this->append_identifier_error($row, 'http://rs.tdwg.org/dwc/terms/taxonID', $parameters, $new_exceptions);
     } elseif ($parameters['row_type'] == 'http://rs.gbif.org/terms/1.0/vernacularname') {
         $new_exceptions = \eol_schema\VernacularName::validate_by_hash($row, $this->skip_warnings);
         if (!self::any_exceptions_of_type_error($new_exceptions)) {
             if (@($v = $row['http://purl.org/dc/terms/language'])) {
                 $this->add_stat('language', $parameters['row_type'], $file_location, $v);
             }
         }
     } elseif ($parameters['row_type'] == 'http://eol.org/schema/reference/reference') {
         $new_exceptions = \eol_schema\Reference::validate_by_hash($row, $this->skip_warnings);
         $this->append_identifier_error($row, 'http://purl.org/dc/terms/identifier', $parameters, $new_exceptions);
     } elseif ($parameters['row_type'] == 'http://eol.org/schema/agent/agent') {
         $new_exceptions = \eol_schema\Agent::validate_by_hash($row, $this->skip_warnings);
         $this->append_identifier_error($row, 'http://purl.org/dc/terms/identifier', $parameters, $new_exceptions);
     } elseif ($parameters['row_type'] == 'http://rs.tdwg.org/dwc/terms/measurementorfact') {
         $new_exceptions = \eol_schema\MeasurementOrFact::validate_by_hash($row, $this->skip_warnings);
         $this->append_identifier_error($row, 'http://rs.tdwg.org/dwc/terms/measurementID', $parameters, $new_exceptions);
     } elseif ($parameters['row_type'] == 'http://eol.org/schema/association') {
         $new_exceptions = \eol_schema\Association::validate_by_hash($row, $this->skip_warnings);
         $this->append_identifier_error($row, 'http://eol.org/schema/associationID', $parameters, $new_exceptions);
     }
     if (!self::any_exceptions_of_type_error($new_exceptions)) {
         if (!isset($this->stats[$parameters['row_type']])) {
             $this->stats[$parameters['row_type']] = array();
         }
         if (!isset($this->stats[$parameters['row_type']]['Total'])) {
             $this->stats[$parameters['row_type']]['Total'] = 0;
         }
         $this->stats[$parameters['row_type']]['Total']++;
     }
     if ($new_exceptions) {
         foreach ($new_exceptions as $exception) {
             $exception->file = $parameters['archive_table_definition']->location;
             $exception->line = $parameters['archive_line_number'];
             if (get_class($exception) == 'eol_schema\\ContentArchiveError') {
                 if (!isset($this->errors_by_line[$parameters['row_type']][$file_location][$exception->line])) {
                     $this->errors_by_line[$parameters['row_type']][$file_location][$exception->line] = array();
                 }
                 $this->errors_by_line[$parameters['row_type']][$file_location][$exception->line][] = $exception;
             } elseif (!$this->skip_warnings) {
                 if (!isset($this->warnings_by_line[$parameters['row_type']][$file_location][$exception->line])) {
                     $this->warnings_by_line[$parameters['row_type']][$file_location][$exception->line] = array();
                 }
                 $this->warnings_by_line[$parameters['row_type']][$file_location][$exception->line][] = $exception;
             }
         }
     }
 }