public static function get_all_taxa($resource_id) { $data = self::prepare_data(); $all_taxa = array(); $used_collection_ids = array(); $i = 0; $total = count(array_keys($data)); foreach (array_keys($data) as $taxon) { $i++; print "\n{$i} of {$total} [{$taxon}]\n"; $taxon_record = $data[$taxon]; $taxon_record["name"] = $taxon; $arr = self::get_BoldsImages_taxa($taxon_record, $used_collection_ids); $page_taxa = $arr[0]; $used_collection_ids = $arr[1]; if ($page_taxa) { $all_taxa = array_merge($all_taxa, $page_taxa); } unset($page_taxa); } $xml = \SchemaDocument::get_taxon_xml($all_taxa); $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; if (!($OUT = fopen($resource_path, "w"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); }
function get_all_taxa($resource_id) { $this->observers = self::get_observers(); $this->activities = self::get_activities(); self::get_associations(); self::get_general_descriptions(); self::prepare_common_names(); echo "\n total: " . count($GLOBALS['taxon']) . "\n"; $all_taxa = array(); $i = 0; $total = count(array_keys($GLOBALS['taxon'])); foreach ($GLOBALS['taxon'] as $taxon_name => $record) { $i++; if ($i % 100 == 0) { echo "\n{$i} of {$total} " . $taxon_name; } $record["taxon_name"] = $taxon_name; $arr = self::get_visitors_taxa($record); $page_taxa = $arr[0]; if ($page_taxa) { $all_taxa = array_merge($all_taxa, $page_taxa); } unset($page_taxa); } $xml = \SchemaDocument::get_taxon_xml($all_taxa); $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; if (!($OUT = fopen($resource_path, "w"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); return $all_taxa; //used for testing }
function get_all_taxa($resource_id) { self::get_associations(); if ($this->debug_info) { echo "\n\n total: " . count($GLOBALS['taxon']) . "\n"; } $all_taxa = array(); $i = 0; $total = count(array_keys($GLOBALS['taxon'])); foreach ($GLOBALS['taxon'] as $taxon_name => $record) { $i++; if ($this->debug_info) { echo "\n{$i} of {$total} " . $taxon_name; } $record["taxon_name"] = $taxon_name; $arr = self::get_plant_feeding_taxa($record); $page_taxa = $arr[0]; if ($page_taxa) { $all_taxa = array_merge($all_taxa, $page_taxa); } unset($page_taxa); } $xml = \SchemaDocument::get_taxon_xml($all_taxa); $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; if (!($OUT = fopen($resource_path, "w"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); return $all_taxa; //used for testing }
function get_videos_from_vimeo() { echo "\n -- start access to vimeo "; $resource_id = "temp_vimeo_to_tamborine2"; require_library('connectors/VimeoAPI'); $taxa = VimeoAPI::get_all_taxa(array("user1632860")); // Peter Kuttner's id $xml = \SchemaDocument::get_taxon_xml($taxa); $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; if (!($OUT = fopen($resource_path, "w"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); echo " -- end.\n"; }
public function get_all_taxa_keys($resource_id) { require_library('connectors/DiscoverLifeAPIv2'); $func = new DiscoverLifeAPIv2(); $taxa_objects = self::process_keys_spreadsheet(); $all_taxa = array(); $used_collection_ids = array(); //initialize text file for DiscoverLife: save names without a page in EOL self::initialize_text_file(DOC_ROOT . self::TEXT_FILE_FOR_DL . "_" . "id_keys" . ".txt"); $i = 0; $save_count = 0; $no_eol_page = 0; foreach ($taxa_objects as $name => $fields) { $i++; //filter names. Process only those who already have a page in EOL. Report back to DiscoverLife names not found in EOL if (!($taxon = $func->with_eol_page($name))) { print "\n {$i} - no EOL page ({$name})"; $no_eol_page++; self::store_name_to_text_file($name, "ID_Keys"); continue; } $taxon["keys"] = array(); foreach ($fields as $field) { $taxon["keys"][] = $field; } print "\n {$i} -- " . $taxon['orig_sciname']; //================================ $arr = self::get_discoverlife_taxa($taxon, $used_collection_ids); $page_taxa = $arr[0]; $used_collection_ids = $arr[1]; if ($page_taxa) { $all_taxa = array_merge($all_taxa, $page_taxa); } unset($page_taxa); } $xml = \SchemaDocument::get_taxon_xml($all_taxa); $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; if (!($OUT = fopen($resource_path, "w"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); $with_eol_page = $i - $no_eol_page; print "\n\n total = {$i} \n With EOL page = {$with_eol_page} \n No EOL page = {$no_eol_page} \n\n"; }
function get_all_taxa($resource_id) { $xml = Functions::get_hashed_response(CONTENT_RESOURCE_LOCAL_PATH . "35.xml"); $all_taxa = array(); $i = 0; $total = count($xml->taxon); foreach ($xml->taxon as $t) { $i++; $t_dwc = $t->children("http://rs.tdwg.org/dwc/dwcore/"); $t_dc = $t->children("http://purl.org/dc/elements/1.1/"); print "\n {$i} of {$total} " . $t_dc->identifier; $url = str_replace('xxx', str_replace('STRI-fish-', '', $t_dc->identifier), $this->map_url); print " [{$url}] "; if ($file = fopen($url, "r")) { if (stripos(fgets($file), "no_website_found")) { print " - no map"; } else { print " - with map"; $taxon_record["taxon"] = array("sciname" => $t_dwc->ScientificName, "family" => $t_dwc->Family, "kingdom" => '', "phylum" => '', "class" => '', "order" => '', "id" => str_replace('STRI-fish-', '', $t_dc->identifier), "mediaURL" => $url, "source" => $this->taxon_page . str_replace('STRI-fish-', '', $t_dc->identifier)); $taxon_record["dataobjects"] = array(); $arr = self::get_stri_taxa($taxon_record); $page_taxa = $arr[0]; if ($page_taxa) { $all_taxa = array_merge($all_taxa, $page_taxa); } unset($page_taxa); } } else { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $url); } } $xml = \SchemaDocument::get_taxon_xml($all_taxa); $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; if (!($OUT = fopen($resource_path, "w"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); }
public function get_all_taxa($resource_id) { // Delete temp files, possible remnants from interrupted runs Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_obis_", "xml"); Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_", "csv"); //divide big file to a more consumable chunks $file_count = self::divide_big_csv_file(40000); //debug orig is 40000 if ($file_count === false) { return false; } $all_taxa = array(); $used_collection_ids = array(); for ($i = 1; $i <= $file_count; $i++) { echo "\nprocessing {$i} => \n"; $arr = self::get_obis_taxa($this->OBIS_DATA_PATH . "temp_" . $i . ".csv", $used_collection_ids); $page_taxa = $arr[0]; $used_collection_ids = $arr[1]; $xml = \SchemaDocument::get_taxon_xml($page_taxa); $resource_path = $this->OBIS_DATA_PATH . "temp_obis_" . $i . ".xml"; if (!($OUT = Functions::file_open($resource_path, "w"))) { return; } fwrite($OUT, $xml); fclose($OUT); } // Combine all XML files. Functions::combine_all_eol_resource_xmls($resource_id, $this->OBIS_DATA_PATH . "temp_obis_*.xml"); // Set to force harvest if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml")) { $GLOBALS['db_connection']->update("UPDATE resources SET resource_status_id=" . ResourceStatus::force_harvest()->id . " WHERE id=" . $resource_id); } // Delete temp files Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_obis_", "xml"); Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_", "csv"); }
$dc_source = $url_for_nematocysts; $subject = "http://rs.tdwg.org/ontology/voc/SPMInfoItems#Biology"; $data_object_parameters = get_data_object("text", $taxon, "nematocyst", $dc_source, $agent_name, $agent_role, $html_nematocysts, $copyright, $image_url, $title, $subject); $taxon_parameters["dataObjects"][] = new \SchemaDataObject($data_object_parameters); } //end $used_taxa[$taxon] = $taxon_parameters; } //with photos //end main loop } foreach ($used_taxa as $taxon_parameters) { $schema_taxa[] = new \SchemaTaxon($taxon_parameters); } ////////////////////// --- $new_resource_xml = SchemaDocument::get_taxon_xml($schema_taxa); $old_resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource->id . ".xml"; if (!($OUT = fopen($old_resource_path, "w+"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $old_resource_path); return; } fwrite($OUT, $new_resource_xml); fclose($OUT); ////////////////////// --- $elapsed_time_sec = microtime(1) - $timestart; echo "{$wrap}"; echo "elapsed time = {$elapsed_time_sec} sec \n"; echo "elapsed time = " . $elapsed_time_sec / 60 . " min \n"; echo "elapsed time = " . $elapsed_time_sec / 60 / 60 . " hr \n"; echo "{$wrap}{$wrap} Done processing."; //######################################################################################################################
public function create_specialist_project_xml($taxon_info, $text_desc = NULL, $multimedia = NULL, $common_names = NULL, $synonyms = NULL, $do_details = NULL) { $schema_taxa = array(); $used_taxa = array(); $i = 0; $references = $do_details['references']; //formerly Scientific Name foreach ($taxon_info["Scientific Name"] as $sciname) { if (!trim($sciname)) { $i++; continue; } $taxon_identifier = self::format($sciname); if (@$used_taxa[$taxon_identifier]) { $taxon_parameters = $used_taxa[$taxon_identifier]; } else { $taxon_parameters = array(); $taxon_parameters["identifier"] = self::format(@$taxon_info["ID"][$i]); $taxon_parameters["kingdom"] = ucfirst(self::format(@$taxon_info["Kingdom"][$i])); $taxon_parameters["phylum"] = ucfirst(self::format(@$taxon_info["Phylum"][$i])); $taxon_parameters["class"] = ucfirst(self::format(@$taxon_info["Class"][$i])); $taxon_parameters["order"] = ucfirst(self::format(@$taxon_info["Order"][$i])); $taxon_parameters["family"] = ucfirst(self::format(@$taxon_info["Family"][$i])); $taxon_parameters["genus"] = ucfirst(self::format(@$taxon_info["Genus"][$i])); $taxon_parameters["scientificName"] = ucfirst(self::format(@$taxon_info["Scientific Name"][$i])); $taxon_parameters["source"] = trim(self::format(@$taxon_info["Source URL"][$i])); //start taxon reference $taxon_parameters["references"] = array(); $refs = array(); $reference_code = self::format(@$taxon_info["Reference Code"][$i]); $reference_code = str_ireplace(" ", "", $reference_code); foreach (explode(",", $reference_code) as $ref_code) { $referenceParameters = array(); $referenceParameters["fullReference"] = self::format(@$references[$ref_code]['Bibliographic Citation']); if (@$references[$ref_code]['URL'] || @$references[$ref_code]['ISBN']) { $referenceParameters["referenceIdentifiers"][] = new \SchemaReferenceIdentifier(array("label" => "url", "value" => self::format(@$references[$ref_code]['URL']))); $referenceParameters["referenceIdentifiers"][] = new \SchemaReferenceIdentifier(array("label" => "isbn", "value" => self::format(@$references[$ref_code]['ISBN']))); } $refs[] = new \SchemaReference($referenceParameters); } $taxon_parameters["references"] = $refs; //end taxon reference //start preferred common names $taxon_parameters["commonNames"] = array(); if (@$taxon_info["Preferred Common Name"][$i]) { $taxon_parameters["commonNames"][] = new \SchemaCommonName(array("name" => self::format(@$taxon_info["Preferred Common Name"][$i]), "language" => self::format(@$taxon_info["Language of Common Name"][$i]))); } //end preferred common names //start common names if (@$common_names[$taxon_identifier]) { foreach (@$common_names[$taxon_identifier] as $rec) { if ($rec) { $taxon_parameters["commonNames"][] = new \SchemaCommonName(array("name" => self::format($rec['Common Name']), "language" => self::format($rec['Language']))); } } } //end common names //start synonyms $taxon_params["synonyms"] = array(); if (@$synonyms[$taxon_identifier]) { foreach (@$synonyms[$taxon_identifier] as $rec) { $taxon_parameters["synonyms"][] = new \SchemaSynonym(array("synonym" => self::format($rec['Synonym']), "relationship" => self::format($rec['Relationship']))); } } //end synonyms //start data objects $dataObjects = array(); $text_desc_title = $text_desc['']; /* to get the title e.g. 'Associations': $text_desc_title[0]['Associations'] */ $temp = self::prepare_text_dataObject(@$text_desc[$taxon_identifier], $do_details, $text_desc_title); $dataObjects = array_merge($dataObjects, $temp); $temp = self::prepare_media_dataObject(@$multimedia[$taxon_identifier], $do_details); $dataObjects = array_merge($dataObjects, $temp); foreach ($dataObjects as $object) { $taxon_parameters["dataObjects"][] = new \SchemaDataObject($object); unset($object); } //end data objects @($used_taxa[$taxon_identifier] = $taxon_parameters); } @($used_taxa[$taxon_identifier] = $taxon_parameters); $i++; } foreach ($used_taxa as $taxon_parameters) { $schema_taxa[] = new \SchemaTaxon($taxon_parameters); } return \SchemaDocument::get_taxon_xml($schema_taxa); }
} } if ($description) { $dataObjects[] = get_data_object($amphibID . "_description", "Description", $description, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#GeneralDescription", $refs, $agents, $pageURL); } /* we didn't get <comments> if($comments) $dataObjects[] = get_data_object("Comments", $comments, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#GeneralDescription", $refs, $agents, $pageURL); */ foreach ($dataObjects as $k => $v) { $taxonParameters["dataObjects"][] = new \SchemaDataObject($v); unset($v); } $taxa[] = new \SchemaTaxon($taxonParameters); //if($i >= 5) break; //debug } $new_resource_xml = \SchemaDocument::get_taxon_xml($taxa); $old_resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; if (!($OUT = Functions::file_open($old_resource_path, "w+"))) { return; } fwrite($OUT, $new_resource_xml); fclose($OUT); Functions::set_resource_status_to_force_harvest($resource_id); shell_exec("rm " . $new_resource_path); //-------- // 0x73 0x20 0x68 0x61 $xml_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; if ($xml = Functions::lookup_with_cache($xml_path, array('timeout' => 1200, 'download_attempts' => 5, 'expire_seconds' => true))) { // $xml = str_replace(chr(0x73)." ".chr(0x20)." ".chr(0x73)." ".chr(0x6B), " ", $xml); // $xml = str_replace(array(chr(0x73), chr(0x20), chr(0x68), chr(0x61)), " ", $xml); $xml = str_replace(array(0x73, 0x20, 0x73, 0x6b), " ", $xml);
function get_all_taxa($task, $temp_file_path) { $all_taxa = array(); $used_collection_ids = array(); $filename = $temp_file_path . $task . ".txt"; echo "\nfilename: [{$filename}]"; $i = 0; foreach (new FileIterator($filename) as $line_number => $line) { self::check_server_downtime(); if ($line) { $i++; echo "\n{$i} "; $line = trim($line); $fields = explode("\t", $line); $taxon_id = trim($fields[0]); $arr = self::get_tropicos_taxa($taxon_id, $used_collection_ids); $page_taxa = $arr[0]; $used_collection_ids = $arr[1]; if ($page_taxa) { $all_taxa = array_merge($all_taxa, $page_taxa); } unset($page_taxa); } else { echo "\n Task list: end-of-file"; } } $xml = \SchemaDocument::get_taxon_xml($all_taxa); // $xml = self::add_rating_to_image_object($xml, '1.0'); $resource_path = $temp_file_path . "temp_tropicos_" . $task . ".xml"; if (!($OUT = fopen($resource_path, "w"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); }
// .3 seconds wait time include_once dirname(__FILE__) . "/../../config/environment.php"; $resource_id = 15; if (!Functions::can_this_connector_run($resource_id)) { return; } require_library('FlickrAPI'); $GLOBALS['ENV_DEBUG'] = false; $auth_token = NULL; if (FlickrAPI::valid_auth_token(FLICKR_AUTH_TOKEN)) { $auth_token = FLICKR_AUTH_TOKEN; } // create new _temp file if (!($resource_file = Functions::file_open(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_temp.xml", "w+"))) { return; } // start the resource file with the XML header fwrite($resource_file, \SchemaDocument::xml_header()); // query Flickr and write results to file FlickrAPI::get_all_eol_photos($auth_token, $resource_file); // write the resource footer fwrite($resource_file, \SchemaDocument::xml_footer()); fclose($resource_file); // cache the previous version and make this new version the current version @unlink(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_previous.xml"); Functions::file_rename(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml", CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_previous.xml"); Functions::file_rename(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_temp.xml", CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"); // set Flickr to force harvest if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml") > 600) { Functions::set_resource_status_to_force_harvest($resource_id); }
function get_all_taxa($resource_id) { $data = self::prepare_data(); if ($data === false) { return false; } $taxa = $data["taxon"]; $taxon_comnames = $data["taxon_comnames"]; $taxon_references = $data["taxon_references"]; $taxon_synonyms = $data["taxon_synonyms"]; $taxon_dataobject = $data["taxon_dataobject"]; $GLOBALS['taxon_dataobject_agent'] = $data["taxon_dataobject_agent"]; $GLOBALS['taxon_dataobject_reference'] = $data["taxon_dataobject_reference"]; $all_taxa = array(); $i = 0; $total = count(array_keys($taxa)); $batch = 1000; //debug orig 1000 $batch_count = 0; foreach ($taxa as $taxon) { // if($taxon["dc_identifier"] != "FB-47873") continue; // debug $i++; debug("\n{$i} of {$total} " . $taxon["dwc_ScientificName"]); $taxon_record["taxon"] = $taxon; $taxon_id = $taxon["int_id"]; $taxon_record["common_names"] = @$taxon_comnames[$taxon_id]; $taxon_record["references"] = @$taxon_references[$taxon_id]; $taxon_record["synonyms"] = @$taxon_synonyms[$taxon_id]; $taxon_record["dataobjects"] = @$taxon_dataobject[$taxon_id]; $arr = self::get_FishBase_taxa($taxon_record); $page_taxa = $arr[0]; if ($page_taxa) { $all_taxa = array_merge($all_taxa, $page_taxa); } unset($page_taxa); if ($i % $batch == 0) { $batch_count++; $xml = \SchemaDocument::get_taxon_xml($all_taxa); $resource_path = $this->TEMP_FILE_PATH . "FB_" . $batch_count . ".xml"; if (!($OUT = fopen($resource_path, "w"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); $all_taxa = array(); } } //last batch $batch_count++; $xml = \SchemaDocument::get_taxon_xml($all_taxa); $resource_path = $this->TEMP_FILE_PATH . "FB_" . $batch_count . ".xml"; if (!($OUT = fopen($resource_path, "w"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); Functions::combine_all_eol_resource_xmls($resource_id, $this->TEMP_FILE_PATH . "FB_*.xml"); self::delete_temp_files($this->TEMP_FILE_PATH . "FB_*.xml"); // remove tmp dir $this->TEMP_FILE_PATH = str_ireplace("/fishbase", "", $this->TEMP_FILE_PATH); if ($this->TEMP_FILE_PATH) { shell_exec("rm -fr {$this->TEMP_FILE_PATH}"); } if ($this->test_run) { return $all_taxa; } //used in testing }
function get_all_taxa($task, $temp_file_path) { $all_taxa = array(); $used_collection_ids = array(); $filename = $temp_file_path . $task . ".txt"; $records = self::get_array_from_json_file($filename); $num_rows = sizeof($records); $i = 0; foreach ($records as $rec) { $i++; echo "\n [{$i} of {$num_rows}] "; echo $rec['taxonomy']['species']['taxon']['name']; // if(trim($rec['taxonomy']['species']['taxon']['name']) != "Lumbricus centralis") continue; //debug $arr = $this->get_boldsys_taxa($rec, $used_collection_ids); $page_taxa = $arr[0]; $used_collection_ids = $arr[1]; if ($page_taxa) { $all_taxa = array_merge($all_taxa, $page_taxa); } unset($page_taxa); } $xml = \SchemaDocument::get_taxon_xml($all_taxa); $xml = str_replace("</mediaURL>", "</mediaURL><additionalInformation><subtype>map</subtype>\n</additionalInformation>\n", $xml); $resource_path = $temp_file_path . $task . ".xml"; if (!($OUT = fopen($resource_path, "w"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); }
public function get_all_taxa($resource_id) { $all_taxa = array(); $this->used_collection_ids = array(); $urls = array(FWP_SPECIES_DOC_PATH); // you can have multiple sources of the species.xls $taxa_arr = self::compile_taxa($urls); require_library('XLSParser'); $parser = new XLSParser(); $images = self::prepare_table($parser->convert_sheet_to_array(FWP_IMAGES_DOC_PATH), "multiple", "SId", "SId", "PictureId", "dbo_Picture_PictureNote", "PictureType", "IsLegal", "Location", "PicComments", "IsAvailable", "LifeStage", "CollectionName", "CollectionAcronym", "PictureSource", "Surname", "Firstname", "DisplayName", "FileName"); $comnames = self::prepare_table($parser->convert_sheet_to_array(FWP_COMNAMES_DOC_PATH), "multiple", "SId", "CommonName", "Language"); $synonyms = self::prepare_table($parser->convert_sheet_to_array(FWP_SYNONYMS_DOC_PATH), "multiple", "SId", "SynGenusSpecies", "SynStatus"); $i = 1; $total = sizeof($taxa_arr); $j = 0; foreach ($taxa_arr as $taxon_arr) { echo "\n {$i} of {$total} -- " . $taxon_arr['SId']; $i++; $taxon_id = $taxon_arr['SId']; $page_taxa = self::get_fishwise_taxa($taxon_arr, @$images[$taxon_id], @$comnames[$taxon_id], @$synonyms[$taxon_id]); $all_taxa = array_merge($all_taxa, $page_taxa); if ($i % 10000 == 0) { $j++; $xml = \SchemaDocument::get_taxon_xml($all_taxa); $j_str = Functions::format_number_with_leading_zeros($j, 3); $resource_path = DOC_ROOT . "/update_resources/connectors/files/FishWisePro/" . $j_str . ".xml"; if (!($OUT = fopen($resource_path, "w+"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); $all_taxa = array(); } } if ($all_taxa) { $j++; $xml = \SchemaDocument::get_taxon_xml($all_taxa); $j_str = Functions::format_number_with_leading_zeros($j, 3); $resource_path = DOC_ROOT . "/update_resources/connectors/files/FishWisePro/" . $j_str . ".xml"; if (!($OUT = fopen($resource_path, "w+"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); } Functions::combine_all_eol_resource_xmls($resource_id, DOC_ROOT . "/update_resources/connectors/files/FishWisePro/*.xml"); self::delete_files(DOC_ROOT . "/update_resources/connectors/files/FishWisePro/*.xml"); return; }
private function get_all_taxa($task) { require_library('CheckIfNameHasAnEOLPage'); $func = new CheckIfNameHasAnEOLPage(); $all_taxa = array(); $used_collection_ids = array(); //initialize text file for DiscoverLife: save names without a page in EOL self::initialize_text_file(self::$TEXT_FILE_FOR_DL); $filename = self::$TEMP_FILE_PATH . $task . ".txt"; if (!($FILE = fopen($filename, "r"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $filename); return; } $i = 0; $save_count = 0; $no_eol_page = 0; while (!feof($FILE)) { if ($line = fgets($FILE)) { sleep(1); $name = trim($line); $i++; //Filter names. Process only those who already have a page in EOL. Report back to DiscoverLife names not found in EOL $arr = $func->check_if_name_has_EOL_page($name); $if_name_has_page_in_EOL = $arr[0]; $xml_from_api = $arr[1]; if (!$if_name_has_page_in_EOL) { print "\n - no EOL page ({$name})"; $no_eol_page++; self::store_name_to_text_file($name, $task); continue; } $taxon = array(); $taxon = $func->get_taxon_simple_stat($name, $xml_from_api); $taxon["map"] = 1; if (trim($name) == trim(Functions::canonical_form(trim($taxon['sciname'])))) { $taxon["call_back"] = "taxon_concept_id"; } else { $taxon["call_back"] = "scientific_name"; } print "\n {$i} -- " . $taxon['sciname'] . "\n"; $arr = self::get_discoverlife_taxa($taxon, $used_collection_ids); $page_taxa = $arr[0]; $used_collection_ids = $arr[1]; if ($page_taxa) { $all_taxa = array_merge($all_taxa, $page_taxa); } unset($page_taxa); } } fclose($FILE); $xml = SchemaDocument::get_taxon_xml($all_taxa); $resource_path = CONTENT_RESOURCE_LOCAL_PATH . "DiscoverLife/temp_DiscoverLife_" . $task . ".xml"; if (!($OUT = fopen($resource_path, "w"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); $with_eol_page = $i - $no_eol_page; print "\n\n total = {$i} \n With EOL page = {$with_eol_page} \n No EOL page = {$no_eol_page} \n\n "; }
function start($resource_id) { $new_resource_path = DOC_ROOT . "temp/" . $resource_id . ".xml"; // $file = 'http://localhost/cp/Amphibiaweb/amphib_dump.xml'; $file = 'http://amphibiaweb.org/amphib_dump.xml'; if (!($new_resource_xml = Functions::lookup_with_cache($file, array('timeout' => 1200, 'download_attempts' => 5, 'expire_seconds' => 86400)))) { echo "\n\n Content partner's server is down, connector will now terminate.\n"; } else { // These may look like the same wrong characters - but they are several different wrong characters $new_resource_xml = str_replace("", "\"", $new_resource_xml); $new_resource_xml = str_replace("", "\"", $new_resource_xml); $new_resource_xml = str_replace("", "-", $new_resource_xml); if (!($OUT = Functions::file_open($new_resource_path, "w+"))) { return; } fwrite($OUT, $new_resource_xml); fclose($OUT); unset($new_resource_xml); $taxa = array(); $xml = simplexml_load_file($new_resource_path); $total = count($xml->species); $i = 0; foreach (@$xml->species as $species) { $i++; if ($i % 1000 == 0) { echo "\n {$i} of {$total} "; } $amphibID = (int) trim($species->amphib_id); $genus = format_utf8((string) trim($species->genus)); $speciesName = format_utf8((string) trim($species->species)); $order = format_utf8((string) trim($species->ordr)); $family = format_utf8((string) trim($species->family)); $commonNames = format_utf8((string) trim($species->common_name)); $commonNames = explode(",", $commonNames); $submittedBy = format_utf8((string) trim($species->submittedby)); $editedBy = format_utf8((string) trim($species->editedby)); $description = format_utf8((string) trim($species->description)); $distribution = format_utf8((string) trim($species->distribution)); $life_history = format_utf8((string) trim($species->life_history)); $trends_and_threats = format_utf8((string) trim($species->trends_and_threats)); $relation_to_humans = format_utf8((string) trim($species->relation_to_humans)); $comments = format_utf8((string) trim($species->comments)); $ref = format_utf8((string) trim($species->refs)); $separator = "<p>"; $separator = "<p>"; $ref = explode($separator, $ref); $refs = array(); foreach ($ref as $r) { $refs[] = array("fullReference" => trim($r)); } $description = fix_article($description); $distribution = fix_article($distribution); $life_history = fix_article($life_history); $trends_and_threats = fix_article($trends_and_threats); $relation_to_humans = fix_article($relation_to_humans); $comments = fix_article($comments); $pageURL = "http://amphibiaweb.org/cgi/amphib_query?where-genus=" . $genus . "&where-species=" . $speciesName . "&account=amphibiaweb"; if (!$submittedBy) { continue; } $agents = array(); if ($submittedBy) { $parts = preg_split("/(,| and )/", $submittedBy); while (list($key, $val) = each($parts)) { $val = trim($val); if (!$val) { continue; } $agentParameters = array(); $agentParameters["role"] = "author"; $agentParameters["fullName"] = $val; $agents[] = new \SchemaAgent($agentParameters); } } $nameString = trim($genus . " " . $speciesName); $taxonParameters = array(); $taxonParameters["identifier"] = $amphibID; $taxonParameters["source"] = $pageURL; $taxonParameters["kingdom"] = "Animalia"; $taxonParameters["phylum"] = "Chordata"; $taxonParameters["class"] = "Amphibia"; $taxonParameters["order"] = $order; $taxonParameters["family"] = $family; $taxonParameters["scientificName"] = $nameString; foreach ($commonNames as $common_name) { $taxonParameters['commonNames'][] = new \SchemaCommonName(array("name" => $common_name, "language" => "en")); } $taxonParameters["dataObjects"] = array(); $dataObjects = array(); if ($distribution) { $dataObjects[] = get_data_object($amphibID . "_distribution", "Distribution and Habitat", $distribution, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#Distribution", $refs, $agents, $pageURL); } if ($life_history) { $dataObjects[] = get_data_object($amphibID . "_life_history", "Life History, Abundance, Activity, and Special Behaviors", $life_history, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#Trends", $refs, $agents, $pageURL); } if ($trends_and_threats) { $dataObjects[] = get_data_object($amphibID . "_trends_threats", "Life History, Abundance, Activity, and Special Behaviors", $trends_and_threats, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#Threats", $refs, $agents, $pageURL); } if ($relation_to_humans) { $dataObjects[] = get_data_object($amphibID . "_relation_to_humans", "Relation to Humans", $relation_to_humans, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#RiskStatement", $refs, $agents, $pageURL); } if ($description != "") { if ($comments != "") { $description .= $comments; } else { if ($comments != "") { $description = $comments; } } } if ($description) { $dataObjects[] = get_data_object($amphibID . "_description", "Description", $description, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#GeneralDescription", $refs, $agents, $pageURL); } /* we didn't get <comments> if($comments) $dataObjects[] = get_data_object("Comments", $comments, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#GeneralDescription", $refs, $agents, $pageURL); */ foreach ($dataObjects as $k => $v) { $taxonParameters["dataObjects"][] = new \SchemaDataObject($v); unset($v); } $taxa[] = new \SchemaTaxon($taxonParameters); //if($i >= 5) break; //debug } $new_resource_xml = \SchemaDocument::get_taxon_xml($taxa); $old_resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; if (!($OUT = Functions::file_open($old_resource_path, "w+"))) { return; } fwrite($OUT, $new_resource_xml); fclose($OUT); // Functions::set_resource_status_to_force_harvest($resource_id); shell_exec("rm " . $new_resource_path); // Functions::gzip_resource_xml($resource_id); // $elapsed_time_sec = microtime(1)-$timestart; // echo "\n"; // echo "elapsed time = $elapsed_time_sec sec \n"; // echo "elapsed time = " . $elapsed_time_sec/60 . " minutes \n"; // echo "elapsed time = " . $elapsed_time_sec/60/60 . " hours \n"; // echo "\n\n Done processing."; } }
function prepare_resource() { if (!($resource_file = fopen(CONTENT_RESOURCE_LOCAL_PATH . $this->resource_id . "_temp.xml", "w+"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . CONTENT_RESOURCE_LOCAL_PATH . $this->resource_id . "_temp.xml"); return; } fwrite($resource_file, \SchemaDocument::xml_header()); $language_iso_codes = self::language_iso_codes(); $avibaseids_added = array(); foreach ($this->names_in_families as $taxon_name => $metadata) { $taxon_parameters = array(); $taxon_parameters['identifier'] = $metadata['avibaseid']; if (isset($avibaseids_added[$metadata['avibaseid']])) { continue; } $avibaseids_added[$metadata['avibaseid']] = 1; $taxon_parameters['kingdom'] = "Animalia"; $taxon_parameters['phylum'] = "Chordata"; $taxon_parameters['class'] = "Aves"; $taxon_parameters['order'] = @$this->family_orders[$metadata['family']]; $taxon_parameters['family'] = @$metadata['family']; $taxon_parameters['scientificName'] = $metadata['taxon_name']; $taxon_parameters['source'] = self::AVIBASE_SOURCE_URL . $metadata['avibaseid']; if (preg_match("/^([a-z][^ ]+) /i", $metadata['taxon_name'], $arr)) { $taxon_parameters['genus'] = $arr[1]; } if (!$taxon_parameters['scientificName']) { continue; } $taxon_parameters['common_names'] = array(); if (isset($metadata['common_names'])) { foreach ($metadata['common_names'] as $language => $common_names) { if ($language_iso_code = @$language_iso_codes[$language]) { foreach ($common_names as $common_name => $value) { $taxon_parameters['commonNames'][] = new \SchemaCommonName(array("name" => $common_name, "language" => $language_iso_code)); } } else { debug("No iso code for: {$language} \n"); } } } $taxon_parameters['synonyms'] = array(); if (isset($metadata['synonyms'])) { foreach ($metadata['synonyms'] as $synonym => $value) { if ($synonym == $metadata['taxon_name']) { continue; } $taxon_parameters['synonyms'][] = new \SchemaSynonym(array("synonym" => $synonym, "relationship" => 'synonym')); } } $taxon = new \SchemaTaxon($taxon_parameters); fwrite($resource_file, $taxon->__toXML()); } fwrite($resource_file, \SchemaDocument::xml_footer()); fclose($resource_file); // cache the previous version and make this new version the current version @unlink(CONTENT_RESOURCE_LOCAL_PATH . $this->resource_id . "_previous.xml"); @rename(CONTENT_RESOURCE_LOCAL_PATH . $this->resource_id . ".xml", CONTENT_RESOURCE_LOCAL_PATH . $this->resource_id . "_previous.xml"); rename(CONTENT_RESOURCE_LOCAL_PATH . $this->resource_id . "_temp.xml", CONTENT_RESOURCE_LOCAL_PATH . $this->resource_id . ".xml"); // returning the last taxon return $taxon; }
function get_all_taxa($task) { $all_taxa = array(); $used_collection_ids = array(); //initialize text file for DiscoverLife: save names without a page in EOL self::initialize_text_file($this->TEXT_FILE_FOR_DL); $filename = $this->TEMP_FILE_PATH . $task . ".txt"; $i = 0; $save_count = 0; $no_eol_page = 0; foreach (new FileIterator($filename) as $line_number => $line) { if ($line) { $name = trim($line); $i++; //Filter names. Process only those who already have a page in EOL. Report back to DiscoverLife names not found in EOL if (!($taxon = self::with_eol_page($name))) { print "\n {$i} -- no EOL page ({$name})"; $no_eol_page++; self::store_name_to_text_file($name, $task); continue; } print "\n {$i} -- " . $taxon['orig_sciname'] . "\n"; $arr = self::get_discoverlife_taxa($taxon, $used_collection_ids); $page_taxa = $arr[0]; $used_collection_ids = $arr[1]; if ($page_taxa) { $all_taxa = array_merge($all_taxa, $page_taxa); } unset($page_taxa); } } $xml = \SchemaDocument::get_taxon_xml($all_taxa); $xml = str_replace("</dataObject>", "<additionalInformation><subtype>map</subtype></additionalInformation></dataObject>", $xml); $resource_path = $this->TEMP_FILE_PATH . "temp_DiscoverLife_" . $task . ".xml"; if (!($OUT = Functions::file_open($resource_path, "w"))) { return; } fwrite($OUT, $xml); fclose($OUT); $with_eol_page = $i - $no_eol_page; print "\n\n total = {$i} \n With EOL page = {$with_eol_page} \n No EOL page = {$no_eol_page} \n\n "; }
public static function get_all_taxa_keys($resource_id) { require_library('CheckIfNameHasAnEOLPage'); $func = new CheckIfNameHasAnEOLPage(); $GLOBALS['animal_plant_list'] = self::prepare_animal_plant_list(); $temp = self::prepare_taxa_list(); $taxa_objects = $temp[0]; $synonymy = $temp[1]; $names_to_be_added = $temp[2]; $all_taxa = array(); $used_collection_ids = array(); //initialize text file for USFWS self::initialize_text_file(DOC_ROOT . self::TEXT_FILE_FOR_PARTNER); $i = 0; $no_eol_page = 0; foreach ($taxa_objects as $name => $taxon) { $i++; if (@$synonymy[$name]) { $name = trim($synonymy[$name]['EOL NAME']); $taxon['NAME'] = $name; } elseif (@$names_to_be_added[$name]) { } else { //filter names. Process only those who already have a page in EOL. Report back to USFWS names not found in EOL $name = str_replace(".", "", $name); if (!self::name_in_eol($name, $func)) { self::store_name_to_text_file($name); $name_canonical = Functions::canonical_form($name); // try the canonical form if ($name == $name_canonical) { $no_eol_page++; continue; } if (!self::name_in_eol($name_canonical, $func)) { $name_without_parenthesis = self::remove_parenthesis($name); // try name without parenthesis if ($name == $name_without_parenthesis) { $no_eol_page++; continue; } if (!self::name_in_eol($name_without_parenthesis, $func)) { $no_eol_page++; continue; } else { echo "\n OK name without parenthesis in EOL: [{$name_without_parenthesis}]\n"; $taxon['NAME'] = $name_without_parenthesis; self::store_name_to_text_file($name . " - name without parenthesis found in EOL: {$name_without_parenthesis}"); } } else { echo "\n OK canonical name in EOL: [{$name_canonical}]\n"; $taxon['NAME'] = $name_canonical; self::store_name_to_text_file($name . " - canonical form of the name found in EOL: {$name_canonical}"); } } } echo "\n {$i} -- "; echo $taxon['NAME'] . " -- "; $arr = self::get_usfws_taxa($taxon, $used_collection_ids); $page_taxa = $arr[0]; $used_collection_ids = $arr[1]; if ($page_taxa) { $all_taxa = array_merge($all_taxa, $page_taxa); } } $xml = \SchemaDocument::get_taxon_xml($all_taxa); $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; if (!($OUT = fopen($resource_path, "w"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); $with_eol_page = $i - $no_eol_page; echo "\n\n total = {$i} \n With EOL page = {$with_eol_page} \n No EOL page = {$no_eol_page} \n\n"; }
<?php namespace php_active_record; /* connector for Photosynth estimated execution time: 29 secs. This connector will use an un-official service to search the Photosynth server. It also scrapes the Photosynth site to get the tags entered by owners as the tags where not included in the service. */ $timestart = microtime(1); include_once dirname(__FILE__) . "/../../config/environment.php"; require_library('connectors/PhotosynthAPI'); $GLOBALS['ENV_DEBUG'] = false; $taxa = PhotosynthAPI::get_all_taxa(); $xml = \SchemaDocument::get_taxon_xml($taxa); $resource_path = CONTENT_RESOURCE_LOCAL_PATH . "119.xml"; if (!($OUT = fopen($resource_path, "w+"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); $elapsed_time_sec = microtime(1) - $timestart; echo "\n"; echo "elapsed time = {$elapsed_time_sec} sec \n"; echo "elapsed time = " . $elapsed_time_sec / 60 . " min \n"; echo "elapsed time = " . $elapsed_time_sec / 60 / 60 . " hr \n"; echo "\n\n Done processing.";
function get_all_taxa($task, $temp_path = null, $info = array()) { $all_taxa = array(); $used_collection_ids = array(); $filename = $this->TEMP_FILE_PATH . $task . ".txt"; $i = 0; $save_count = 0; $no_eol_page = 0; $total = Functions::count_rows_from_text_file($filename); foreach (new FileIterator($filename) as $line_number => $line) { $split = explode("\t", trim($line)); if (!@$split[0]) { continue; } $taxon = array("sciname" => $split[1], "id" => $split[0], "rank" => @$split[2]); $i++; echo "\n {$info['1']} of {$info['0']}"; echo "\n {$i} of {$total} -- " . $taxon['sciname'] . " {$taxon['id']} \n"; $arr = self::get_Bolds_taxa($taxon, $used_collection_ids); $page_taxa = $arr[0]; $used_collection_ids = $arr[1]; if ($page_taxa) { $all_taxa = array_merge($all_taxa, $page_taxa); } unset($page_taxa); // if($i >= 2) break; //debug } $xml = \SchemaDocument::get_taxon_xml($all_taxa); $xml = str_replace("</mediaURL>", "</mediaURL><additionalInformation><subtype>map</subtype>\n</additionalInformation>\n", $xml); $resource_path = $this->TEMP_FILE_PATH . "temp_Bolds_" . $task . ".xml"; if (!($OUT = fopen($resource_path, "w"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); echo "\n\n total = {$i} \n\n"; // remove temp dir recursive_rmdir($this->TEMP_DIR); // debug - uncomment in real operation echo "\n temporary directory removed: " . $this->TEMP_DIR; }