function get_all_taxa() { require_library('connectors/INBioAPI'); $func = new INBioAPI(); $paths = $func->extract_archive_file($this->dwca_file, "meta.xml"); $archive_path = $paths['archive_path']; $temp_dir = $paths['temp_dir']; $harvester = new ContentArchiveReader(NULL, $archive_path); $tables = $harvester->tables; if (!($this->fields["taxa"] = $tables["http://rs.tdwg.org/dwc/terms/taxon"][0]->fields)) { debug("Invalid archive file. Program will terminate."); return false; } self::build_taxa_rank_array($harvester->process_row_type('http://rs.tdwg.org/dwc/terms/Taxon')); self::create_instances_from_taxon_object($harvester->process_row_type('http://rs.tdwg.org/dwc/terms/Taxon')); self::get_objects($harvester->process_row_type('http://eol.org/schema/media/Document')); self::get_references($harvester->process_row_type('http://rs.gbif.org/terms/1.0/Reference')); self::get_agents($harvester->process_row_type('http://eol.org/schema/agent/Agent')); self::get_vernaculars($harvester->process_row_type('http://rs.gbif.org/terms/1.0/VernacularName')); $this->archive_builder->finalize(TRUE); // remove temp dir recursive_rmdir($temp_dir); echo "\n temporary directory removed: " . $temp_dir; print_r($this->debug); }
function export_gbif_to_eol($params) { require_library('connectors/INBioAPI'); $func = new INBioAPI(); $paths = $func->extract_archive_file($params["dwca_file"], "meta.xml"); $archive_path = $paths['archive_path']; $temp_dir = $paths['temp_dir']; $harvester = new ContentArchiveReader(NULL, $archive_path); $tables = $harvester->tables; if (!($this->fields["occurrence"] = $tables["http://rs.tdwg.org/dwc/terms/occurrence"][0]->fields)) { debug("Invalid archive file. Program will terminate."); return false; } /* $harvester->process_row_type() - this will convert rows into array. */ // $r = $harvester->process_row_type('http://rs.tdwg.org/dwc/terms/occurrence'); // $r = $harvester->process_row_type('http://rs.gbif.org/terms/1.0/Multimedia'); // print_r($r); exit; self::create_instances_from_taxon_object($harvester->process_row_type('http://rs.tdwg.org/dwc/terms/occurrence')); self::get_media_objects($harvester->process_row_type('http://rs.gbif.org/terms/1.0/Multimedia')); // self::get_objects($harvester->process_row_type('http://eol.org/schema/media/Document')); // self::get_references($harvester->process_row_type('http://rs.gbif.org/terms/1.0/Reference')); // self::get_agents($harvester->process_row_type('http://eol.org/schema/agent/Agent')); // self::get_vernaculars($harvester->process_row_type('http://rs.gbif.org/terms/1.0/VernacularName')); $this->archive_builder->finalize(TRUE); // remove temp dir recursive_rmdir($temp_dir); echo "\n temporary directory removed: " . $temp_dir; }
function generate_EnvEOL_data() { require_library('connectors/IUCNRedlistDataConnector'); $func = new IUCNRedlistDataConnector(); $basenames = array("eol_env_annotations_noParentTerms"); // list of needed basenames $options = $this->download_options; $options['expire_seconds'] = 2592000 * 3; // 3 months before cache expires $text_path = $func->load_zip_contents($this->species_list_export, $options, $basenames, ".tsv"); print_r($text_path); self::csv_to_array($text_path[$basenames[0]]); $this->archive_builder->finalize(TRUE); // remove temp dir $parts = pathinfo($text_path[$basenames[0]]); recursive_rmdir($parts["dirname"]); debug("\n temporary directory removed: " . $parts["dirname"]); recursive_rmdir($this->TEMP_DIR); // comment this if u want to check "need_to_check_tc_id.txt" /* run problematic tc_ids with cache=0 --- a utility $tc_ids = self::get_dump(); foreach($tc_ids as $tc_id) { $rec['taxon_id'] = $tc_id; self::prepare_taxon($rec); } exit("\n-exit-\n"); */ }
private function get_uris($params) { $fields = array(); if (in_array($params["dataset"], array("NMNH", "NHM"))) { $fields["institutionCode"] = "institutionCode_uri"; $fields["sex"] = "sex_uri"; $fields["typeStatus"] = "typeStatus_uri"; $fields["lifeStage"] = "lifeStage_uri"; $fields["collectionCode"] = "collectionCode_uri"; } require_library('connectors/LifeDeskToScratchpadAPI'); $func = new LifeDeskToScratchpadAPI(); $spreadsheet_options = array("cache" => 1, "timeout" => 3600, "file_extension" => "xlsx", 'download_attempts' => 2, 'delay_in_minutes' => 2); $spreadsheet_options["expire_seconds"] = 0; // false => won't expire; 0 => expires now $uris = array(); if ($spreadsheet = @$params["uri_file"]) { if ($arr = $func->convert_spreadsheet($spreadsheet, 0, $spreadsheet_options)) { foreach ($fields as $key => $value) { $i = 0; foreach ($arr[$key] as $item) { $item = trim($item); if ($item) { $uris[$item] = $arr[$value][$i]; } $i++; } } } } return $uris; }
private function update_eol_xml($lifedesk_name) { /* taxon = 434 dwc:ScientificName = 434 reference = 614 synonym = 68 commonName = 2 dataObjects = 1705 reference = 0 texts = 1146 images = 559 videos = 0 sounds = 0 */ require_library('ResourceDataObjectElementsSetting'); $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $lifedesk_name . ".xml"; $func = new ResourceDataObjectElementsSetting($lifedesk_name, $resource_path); $xml = file_get_contents($this->text_path["eol_xml"]); $xml = $func->replace_taxon_element_value("dc:source", "replace any existing value", "", $xml, false); $xml = $func->replace_data_object_element_value("dc:source", "replace any existing value", "", $xml, false); $xml = self::remove_tags_in_references($xml); $func->save_resource_document($xml); // zip the xml $command_line = "gzip -c " . CONTENT_RESOURCE_LOCAL_PATH . $lifedesk_name . ".xml >" . CONTENT_RESOURCE_LOCAL_PATH . $lifedesk_name . ".xml.gz"; $output = shell_exec($command_line); }
function remove_dataObject() { require_library('ResourceDataObjectElementsSetting'); $resource_id = 346; $resource_path = "http://localhost/eol_php_code/applications/content_server/resources/346.xml.gz"; $nmnh = new ResourceDataObjectElementsSetting($resource_id, $resource_path); $xml = $nmnh->load_xml_string(); $xml = $nmnh->remove_data_object_of_certain_element_value("mimeType", "image/x-adobe-dng", $xml); $nmnh->save_resource_document($xml); }
function convert_to_dwca($params) { require_library('FlickrAPI'); $auth_token = NULL; // if(FlickrAPI::valid_auth_token(FLICKR_AUTH_TOKEN)) $auth_token = FLICKR_AUTH_TOKEN; $page = 1; $per_page = 500; $url = $this->service['photosets'] . '&photoset_id=' . $params['photoset_id'] . '&user_id=' . $params['flickr_user_id'] . '&per_page=' . $per_page; if ($json = Functions::lookup_with_cache($url . '&page=' . $page, $this->download_options)) { $json = str_replace("\\'", "'", $json); $obj = json_decode($json); $total_pages = ceil($obj->photoset->total / $per_page); echo "\ntotal_pages = {$total_pages}\n"; for ($i = 1; $i <= $total_pages; $i++) { if ($json = Functions::lookup_with_cache($url . '&page=' . $page, $this->download_options)) { $json = str_replace("\\'", "'", $json); $obj = json_decode($json); $k = 0; $total_photos = count($obj->photoset->photo); foreach ($obj->photoset->photo as $rec) { $k++; echo "\n{$i} of {$total_pages} - {$k} of {$total_photos}"; if (!($sciname = self::get_sciname_from_title($rec->title))) { continue; } // if($sciname == "SONY DSC") //debug // { // print_r($rec); // } $photo_response = FlickrAPI::photos_get_info($rec->id, $rec->secret, $auth_token, $this->download_options); $photo = @$photo_response->photo; if (!$photo) { continue; } if ($photo->visibility->ispublic != 1) { continue; } if ($photo->usage->candownload != 1) { continue; } if (@(!$GLOBALS["flickr_licenses"][$photo->license])) { continue; } $data_objects = FlickrAPI::get_data_objects($photo, $params['flickr_user_id']); foreach ($data_objects as $do) { self::create_archive($sciname, $do); } } } $page++; // break; //debug } } $this->archive_builder->finalize(TRUE); }
public static function top_images() { require_library('TopImages'); $log = HarvestProcessLog::create(array('process_name' => 'Top Images')); $top_images = new TopImages(); $top_images->begin_process(); $top_images->top_concept_images(true); $top_images->top_concept_images(false); \Resque::enqueue('harvesting', 'CodeBridge', array('cmd' => 'denormalize_tables')); $log->finished(); }
public function __construct() { $this->mysqli =& $GLOBALS['db_connection']; if ($GLOBALS['ENV_NAME'] == 'production' && environment_defined('slave')) { $this->mysqli_slave = load_mysql_environment('slave'); } else { $this->mysqli_slave =& $this->mysqli; } require_library('RubyNameParserClient'); $this->name_parser = new RubyNameParserClient(); }
private function start($spreadsheet, $temp_path = false) { require_library('XLSParser'); $parser = new XLSParser(); $doc = self::download_file_accordingly($spreadsheet); $download_options = $this->download_options; $download_options['file_extension'] = self::get_extension($doc); if ($path = Functions::save_remote_file_to_local($doc, $download_options)) { $worksheets = self::get_worksheets($path, $parser, $temp_path); print_r($worksheets); foreach ($worksheets as $index => $worksheet_title) { echo "\nProcessing worksheet: [{$worksheet_title}]"; if ($temp_path) { $params = array("worksheet_title" => $worksheet_title, "path" => $temp_path); $parser->convert_sheet_to_array($path, $index, NULL, $params); } else { $arr = $parser->convert_sheet_to_array($path, $index); if (!self::sheet_is_valid($arr, $worksheet_title)) { echo " - invalid worksheet\n"; continue; } $fields = array(); $uris = array(); $fields = array_keys($arr); foreach ($fields as $field) { $uris[] = $arr[$field][0]; } // print_r($fields); print_r($uris); continue; $i = -1; foreach ($arr[$fields[0]] as $row) { $i++; if ($i > 7) { $rec = array(); foreach ($fields as $field) { $rec[$field] = $arr[$field][$i]; } if ($rec) { self::create_archive($worksheet_title, $fields, $uris, $rec); } } } } } unlink($path); if (file_exists($doc)) { unlink($doc); } } else { echo "\n [{$doc}] unavailable! \n"; } }
private function get_institution_uris() { require_library('connectors/LifeDeskToScratchpadAPI'); $func = new LifeDeskToScratchpadAPI(); $arr = $func->convert_spreadsheet($this->institutions_xls, 0); $institutions = array(); $k = 0; foreach ($arr["Code"] as $code) { $institutions[$code] = $arr["URI"][$k]; $k++; } echo "\nInstitutions from spreadsheet [{$this->institutions_xls}]:" . count($institutions) . "\n"; return $institutions; }
private function parse_xls() { if ($this->data_dump_url = Functions::save_remote_file_to_local($this->data_dump_url, array('download_wait_time' => 1000000, 'timeout' => 600, 'download_attempts' => 5, 'file_extension' => 'xls'))) { require_library('XLSParser'); $parser = new XLSParser(); debug("\n reading: " . $this->data_dump_url . "\n"); $temp = $parser->convert_sheet_to_array($this->data_dump_url); $records = $parser->prepare_data($temp, "single", "SCIENTIFIC NAME", "SCIENTIFIC NAME", "CATEGORY", "ENGLISH NAME", "RANGE", "ORDER", "FAMILY", "EXTINCT", "EXTINCT_YEAR"); $records = self::fill_in_missing_names($records); $records = self::fill_in_parent_id($records); debug("\n" . count($records)); return $records; } }
function process_xml($params) { $all_taxa = self::get_taxa_list_from_biogeodb(); $this->taxa_id_list = array_merge($all_taxa, self::get_taxa_list_from_myspecies()); require_library('connectors/INBioAPI'); $func = new INBioAPI(); $paths = $func->extract_archive_file($params["eol_xml_file"], $params["filename"], array("timeout" => 7200, "expire_seconds" => false)); print_r($paths); $params["path"] = $paths["temp_dir"]; $xml = self::update_xml($params); recursive_rmdir($paths["temp_dir"]); // remove temp dir return $xml; }
private function parse_xls() { if ($this->data_dump_url = Functions::save_remote_file_to_local($this->data_dump_url, array('cache' => 1, 'download_wait_time' => 1000000, 'timeout' => 600, 'download_attempts' => 5, 'file_extension' => 'xlsx'))) { require_library('XLSParser'); $parser = new XLSParser(); debug("\n reading: " . $this->data_dump_url . "\n"); $temp = $parser->convert_sheet_to_array($this->data_dump_url, 0); $records = $parser->prepare_data($temp, "single", "Scientific name", "Scientific name", "Category", "English name", "Range", "Order", "Family", "Extinct", "Extinction Year"); $records = self::add_uppercase_fields($records); $records = self::fill_in_missing_names($records); $records = self::fill_in_parent_id($records); debug("\n" . count($records)); return $records; } }
function model_boostrap() { require_library("/vendor/doctrine/Doctrine.php"); spl_autoload_register(array('doctrine', 'autoload')); $conn = Doctrine_Manager::connection(dsn); $conn->setCollate('utf8_general_ci'); $conn->setCharset('utf8'); $conn->setAttribute(Doctrine::ATTR_AUTO_ACCESSOR_OVERRIDE, true); $conn->setAttribute(Doctrine::ATTR_AUTOLOAD_TABLE_CLASSES, true); $servers = array('host' => 'localhost', 'port' => 11211, 'persistent' => true); $cacheDriver = new Doctrine_Cache_Memcache(array('servers' => $servers, 'compression' => false)); $conn->setAttribute(Doctrine::ATTR_QUERY_CACHE, $cacheDriver); Doctrine::loadModels(library_root . '/models/dal'); Doctrine::loadModels(library_root . '/models'); }
function get_all_taxa() { require_library('XLSParser'); $docs = count($this->spreadsheets); $doc_count = 0; foreach ($this->spreadsheets as $doc) { $doc_count++; echo "\n processing [{$doc}]...\n"; if ($path = Functions::save_remote_file_to_local($this->url_path . $doc, array("cache" => 1, "timeout" => 3600, "file_extension" => "xls", 'download_attempts' => 2, 'delay_in_minutes' => 2))) { $parser = new XLSParser(); $arr = $parser->convert_sheet_to_array($path); $fields = array_keys($arr); $i = -1; $rows = count($arr["Species"]); echo "\n total {$path}: {$rows} \n"; foreach ($arr["Species"] as $Species) { $i++; $rec = array(); foreach ($fields as $field) { $rec[$field] = $arr[$field][$i]; } $rec = array_map('trim', $rec); /* breakdown when caching $cont = false; // if($i >= 1 && $i < 6000) $cont = true; // if($i >= 3000 && $i < 6000) $cont = true; // if($i >= 6000 && $i < 9000) $cont = true; // if($i >= 9000 && $i < 12000) $cont = true; // if($i >= 11800 && $i < 15000) $cont = true; if(!$cont) continue; */ print "\n [{$doc_count} of {$docs}][" . ($i + 1) . " of {$rows}] " . $rec["Species"] . "\n"; $rec = self::clean_taxon_name($rec); $taxon_id = trim(preg_replace('/\\s*\\([^)]*\\)/', '', $rec["sciname"])); // remove parenthesis $taxon_id = str_replace(" ", "_", $taxon_id); $rec["taxon_id"] = md5($taxon_id); self::create_instances_from_taxon_object($rec); self::prepare_images($rec); self::prepare_data($rec); } unlink($path); } else { echo "\n [{$doc}] unavailable! \n"; } } $this->archive_builder->finalize(TRUE); }
function get_videos_from_vimeo() { echo "\n -- start access to vimeo "; $resource_id = "temp_vimeo_to_tamborine2"; require_library('connectors/VimeoAPI'); $taxa = VimeoAPI::get_all_taxa(array("user1632860")); // Peter Kuttner's id $xml = \SchemaDocument::get_taxon_xml($taxa); $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; if (!($OUT = fopen($resource_path, "w"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); echo " -- end.\n"; }
public function get_all_taxa_keys($resource_id) { require_library('connectors/DiscoverLifeAPIv2'); $func = new DiscoverLifeAPIv2(); $taxa_objects = self::process_keys_spreadsheet(); $all_taxa = array(); $used_collection_ids = array(); //initialize text file for DiscoverLife: save names without a page in EOL self::initialize_text_file(DOC_ROOT . self::TEXT_FILE_FOR_DL . "_" . "id_keys" . ".txt"); $i = 0; $save_count = 0; $no_eol_page = 0; foreach ($taxa_objects as $name => $fields) { $i++; //filter names. Process only those who already have a page in EOL. Report back to DiscoverLife names not found in EOL if (!($taxon = $func->with_eol_page($name))) { print "\n {$i} - no EOL page ({$name})"; $no_eol_page++; self::store_name_to_text_file($name, "ID_Keys"); continue; } $taxon["keys"] = array(); foreach ($fields as $field) { $taxon["keys"][] = $field; } print "\n {$i} -- " . $taxon['orig_sciname']; //================================ $arr = self::get_discoverlife_taxa($taxon, $used_collection_ids); $page_taxa = $arr[0]; $used_collection_ids = $arr[1]; if ($page_taxa) { $all_taxa = array_merge($all_taxa, $page_taxa); } unset($page_taxa); } $xml = \SchemaDocument::get_taxon_xml($all_taxa); $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; if (!($OUT = fopen($resource_path, "w"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); $with_eol_page = $i - $no_eol_page; print "\n\n total = {$i} \n With EOL page = {$with_eol_page} \n No EOL page = {$no_eol_page} \n\n"; }
function clean_media_extension($resource_id, $dwca_file) { require_library('connectors/INBioAPI'); $func = new INBioAPI(); if ($paths = $func->extract_archive_file($dwca_file, "meta.xml")) { print_r($paths); if ($contents = Functions::get_remote_file($paths['archive_path'] . "media.txt", array('timeout' => 172800))) { $contents = str_ireplace('<a title=""', '<a title="', $contents); $contents = str_ireplace('"" href=""', '" href="', $contents); $contents = str_ireplace('"">', '">', $contents); //saving new media.txt if (!($WRITE = fopen($paths['archive_path'] . "media.txt", "w"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $paths['archive_path'] . "media.txt"); return; } fwrite($WRITE, $contents); fclose($WRITE); // remove the archive file e.g. plazi.zip $info = pathinfo($dwca_file); unlink($paths['archive_path'] . $info["basename"]); // creating the archive file $command_line = "tar -czf " . CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".tar.gz --directory=" . $paths['archive_path'] . " ."; $output = shell_exec($command_line); // moving files to /resources/ recursive_rmdir(CONTENT_RESOURCE_LOCAL_PATH . $resource_id); if (!file_exists(CONTENT_RESOURCE_LOCAL_PATH . $resource_id)) { mkdir(CONTENT_RESOURCE_LOCAL_PATH . $resource_id); } $src = $paths['archive_path']; $dst = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "/"; $files = glob($paths['archive_path'] . "*.*"); foreach ($files as $file) { $file_to_go = str_replace($src, $dst, $file); copy($file, $file_to_go); } } // remove temp dir recursive_rmdir($paths['archive_path']); echo "\n temporary directory removed: " . $paths['archive_path']; } }
private function get_uris() { require_library('connectors/LifeDeskToScratchpadAPI'); $func = new LifeDeskToScratchpadAPI(); $spreadsheet_options = array("cache" => 1, "timeout" => 3600, "file_extension" => "xlsx", 'download_attempts' => 2, 'delay_in_minutes' => 2); // $spreadsheet_options["expire_seconds"] = 0; // false => won't expire; 0 => expires now $temp = $func->convert_spreadsheet($this->triple_uris_spreadsheet, 0, $spreadsheet_options); /* spreadsheet headers: Wikipedia triple - Measurement Type - Measurement Value1 - Measurement Value2 */ $uris = array(); $i = -1; foreach ($temp["Wikipedia triple"] as $triple) { $i++; if ($temp["Measurement Type"][$i] == "EXCLUDE") { continue; } $uris[$triple]["mtype"] = $temp["Measurement Type"][$i]; $uris[$triple]["v1"] = @$temp["Measurement Value1"][$i]; $uris[$triple]["v2"] = @$temp["Measurement Value2"][$i]; } return $uris; }
function export_xml_to_archive($params, $xml_file_YN = false) { if (!$xml_file_YN) { require_library('connectors/INBioAPI'); $func = new INBioAPI(); $paths = $func->extract_archive_file($params["eol_xml_file"], $params["filename"], array("timeout" => 7200, "expire_seconds" => 0)); // "expire_seconds" -- false => won't expire; 0 => expires now //debug print_r($paths); $params["path"] = $paths["temp_dir"]; self::convert_xml($params); $this->archive_builder->finalize(TRUE); recursive_rmdir($paths["temp_dir"]); // remove temp dir } else { $params['path'] = DOC_ROOT . "tmp/"; $local_xml_file = Functions::save_remote_file_to_local($params['eol_xml_file'], array('file_extension' => "xml", 'cache' => 0, "timeout" => 7200, "download_attempts" => 2, "delay_in_minutes" => 2)); //debug - cache should be 0 zero in normal operation $params['filename'] = pathinfo($local_xml_file, PATHINFO_BASENAME); self::convert_xml($params); $this->archive_builder->finalize(TRUE); unlink($local_xml_file); } }
function get_all_taxa() { require_library('connectors/INBioAPI'); $func = new INBioAPI(); $paths = $func->extract_archive_file($this->dwca_file, "meta.xml"); $archive_path = $paths['archive_path']; $harvester = new ContentArchiveReader(NULL, $archive_path); $tables = $harvester->tables; if (!($this->fields["taxa"] = $tables["http://rs.tdwg.org/dwc/terms/taxon"][0]->fields)) { debug("Invalid archive file. Program will terminate."); return false; } $row_types = self::get_XML_fields($paths["temp_dir"] . "meta.xml", "rowType"); /* manual assignment, use this if some referenceID in Measurements don't exist in References.tab $row_types = array("http://eol.org/schema/reference/Reference", "http://rs.tdwg.org/dwc/terms/Taxon", "http://rs.tdwg.org/dwc/terms/MeasurementOrFact", "http://rs.tdwg.org/dwc/terms/Occurrence"); */ print_r($row_types); foreach ($row_types as $row_type) { $basename = pathinfo($row_type, PATHINFO_BASENAME); if ($basename == "Taxon") { $allowed_fields = array("taxonID", "scientificName", "parentNameUsageID", "kingdom", "phylum", "class", "order", "family", "genus", "taxonRank", "furtherInformationURL", "taxonomicStatus", "taxonRemarks", "namePublishedIn", "referenceID"); } else { $allowed_fields = self::get_XML_fields($this->extensions[$basename], "property name"); } //manual adjustment if ($row_type == "VernacularName") { $allowed_fields[] = "taxonID"; } self::process_fields($harvester->process_row_type($row_type), $basename, $allowed_fields); // e.g. self::process_fields($harvester->process_row_type('http://rs.tdwg.org/dwc/terms/Taxon'), "Taxon"); } $this->archive_builder->finalize(TRUE); recursive_rmdir($paths['temp_dir']); // remove temp dir echo "\n temporary directory removed: " . $paths['temp_dir']; print_r($this->debug); }
function start_process($resource_id, $call_multiple_instance, $connectors_to_run = 1) { require_library('connectors/BOLDSysAPI'); $this->func = new BOLDSysAPI(); $this->resource_id = $resource_id; $this->call_multiple_instance = $call_multiple_instance; $this->connectors_to_run = $connectors_to_run; if (!trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST))) { if (!trim(Functions::get_a_task($this->INITIAL_PROCESS_STATUS))) { // Divide the big list of ids into small files Functions::add_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); $this->func->create_master_list(); Functions::delete_a_task("Initial process start", $this->INITIAL_PROCESS_STATUS); } } Functions::process_work_list($this); if (!($task = trim(Functions::get_a_task($this->WORK_IN_PROGRESS_LIST)))) { $this->archive_builder->finalize(true); // Set to force harvest Functions::set_resource_status_to_force_harvest($resource_id); // Delete temp files Functions::delete_temp_files($this->TEMP_FILE_PATH . "sl_batch_", "txt"); } }
<?php namespace php_active_record; /* connector for Photosynth estimated execution time: 29 secs. This connector will use an un-official service to search the Photosynth server. It also scrapes the Photosynth site to get the tags entered by owners as the tags where not included in the service. */ $timestart = microtime(1); include_once dirname(__FILE__) . "/../../config/environment.php"; require_library('connectors/PhotosynthAPI'); $GLOBALS['ENV_DEBUG'] = false; $taxa = PhotosynthAPI::get_all_taxa(); $xml = \SchemaDocument::get_taxon_xml($taxa); $resource_path = CONTENT_RESOURCE_LOCAL_PATH . "119.xml"; if (!($OUT = fopen($resource_path, "w+"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $resource_path); return; } fwrite($OUT, $xml); fclose($OUT); $elapsed_time_sec = microtime(1) - $timestart; echo "\n"; echo "elapsed time = {$elapsed_time_sec} sec \n"; echo "elapsed time = " . $elapsed_time_sec / 60 . " min \n"; echo "elapsed time = " . $elapsed_time_sec / 60 / 60 . " hr \n"; echo "\n\n Done processing.";
<?php namespace php_active_record; /* NCBI Bio Projects partner provides an XML dump estimated execution time: */ include_once dirname(__FILE__) . "/../../config/environment.php"; require_library('connectors/NCBIProjectsAPI'); $timestart = time_elapsed(); $resource_id = 173; $func = new NCBIProjectsAPI($resource_id); $func->get_all_taxa(); if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_working/taxon.tab") > 1000) { if (is_dir(CONTENT_RESOURCE_LOCAL_PATH . $resource_id)) { recursive_rmdir(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_previous"); Functions::file_rename(CONTENT_RESOURCE_LOCAL_PATH . $resource_id, CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_previous"); } Functions::file_rename(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_working", CONTENT_RESOURCE_LOCAL_PATH . $resource_id); Functions::set_resource_status_to_force_harvest($resource_id); } $elapsed_time_sec = time_elapsed() - $timestart; echo "\n\n elapsed time = " . $elapsed_time_sec / 60 . " minutes"; echo "\n elapsed time = " . $elapsed_time_sec / 60 / 60 . " hours"; echo "\n Done processing.\n";
<?php namespace php_active_record; /* estimated execution time: */ include_once dirname(__FILE__) . "/../../config/environment.php"; require_library('connectors/SciELOAPI'); $timestart = time_elapsed(); $resource_id = "487"; $scielo_connector = new SciELOAPI($resource_id); $scielo_connector->get_all_taxa(); Functions::finalize_dwca_resource($resource_id); $elapsed_time_sec = time_elapsed() - $timestart; echo "\n\n elapsed time = " . $elapsed_time_sec / 60 . " minutes"; echo "\n elapsed time = " . $elapsed_time_sec / 60 / 60 . " hours"; exit("\n Done processing.");
private function get_worms_taxa() { require_library('connectors/FishBaseAPI'); $func = new FishBaseAPI(); $fields = array(); $excluded_fields = array("taxonID", "furtherInformationURL", "referenceID", "namePublishedIn", "taxonomicStatus", "taxonRemarks", "rightsHolder", "parentNameUsageID"); $taxa = $func->make_array($this->text_path["worms"]["worms_taxon"], $fields, "", $excluded_fields); array_shift($taxa); foreach ($taxa as $taxon) { $final[Functions::canonical_form($taxon["scientificName"])] = $taxon["taxonRank"]; } unset($taxa); return $final; }
<?php namespace php_active_record; /* http://eol.org/content_partners/585/resources/831 - DATA-1622 This is a generic script that will convert EOL XML to EOL DWC-A */ include_once dirname(__FILE__) . "/../../config/environment.php"; require_library('connectors/ConvertEOLtoDWCaAPI'); $timestart = time_elapsed(); $resource_id = 831; $params["eol_xml_file"] = ""; $params["eol_xml_file"] = Functions::get_accesspoint_url_if_available($resource_id, "http://jhr.pensoft.net/lib/eol_exports/JHR.xml"); $params["filename"] = "no need to mention here.xml"; $params["dataset"] = "Pensoft XML files"; $params["resource_id"] = $resource_id; $func = new ConvertEOLtoDWCaAPI($resource_id); $func->export_xml_to_archive($params, true); // true => means it is an XML file, not an archive file nor a zip file Functions::finalize_dwca_resource($resource_id); $elapsed_time_sec = time_elapsed() - $timestart; echo "\n\n"; echo "elapsed time = " . $elapsed_time_sec / 60 . " minutes \n"; echo "elapsed time = " . $elapsed_time_sec / 60 / 60 . " hours \n"; echo "\nDone processing.\n";
private function process_text_files() { if (!self::load_zip_contents()) { return false; } print_r($this->text_path); require_library('connectors/FishBaseAPI'); $func = new FishBaseAPI(); $fields = array("IDNUM", "TAXONID", "TAXID1", "SUBORDER", "TAXID2", "DIVISION", "TAXID5", "SUPERFAM", "TAXID9", "FAMILY", "TAXID13", "SUBFAM", "TAXID17", "TRIBE", "TAXID20", "SUBTRIBE", "GENUS", "SUBGENUS", "SPECIES", "SUBSPECS", "RECTYPCD", "RECTYPE", "UPDATE", "UPDUSER", "LOCGENUS", "LOCSUBGN", "SPCGROUP", "LOCSPECS", "LOCSUBSP", "INFRASUB", "CAUTHOR", "ORIGAUTH", "AUTHOR", "PUBYEAR", "JOURNAL", "COLLATN", "INTROADV", "TYPEDEP", "TYPMATTX", "TYPELOC", "SEXTX", "TSGENUS", "TSSUBGEN", "TSSPECS", "TSSUBSP", "TSAUTHOR", "TSKIND", "INFRATYP", "LOCVAR", "SEQUENCE", "UPSTAT"); $taxa = $func->make_array($this->text_path[1], $fields, "", array(), "^"); /* to get the breakdown of taxa levels foreach($taxa as $rec) { if(@$temp[$rec["RECTYPE"]]) $temp[$rec["RECTYPE"]]++; else $temp[$rec["RECTYPE"]] = 1; } print_r($temp); // [Super-Generic ] => 614 xxx // [Genus/Subgenus ] => 2540 xxx // [Genus/Subgenus Synon] => 3459 // [Species ] => 19970 xxx // [Species Synonym ] => 11083 // [Unplaced Taxon ] => 445 xxx // [Nomen Nudum ] => 177 // [Species Group ] => 440 xxx */ $i = 0; $link = array(); foreach ($taxa as $rec) { if (in_array($rec["RECTYPE"], array("Species", "Genus/Subgenus", "Species Group", "Super-Generic"))) { $i++; $sciname = $rec["GENUS"] . " " . $rec["SPECIES"]; $sciname = trim($sciname); if ($rec["RECTYPE"] == "Species Group") { $sciname = $rec["SPCGROUP"]; } $temp[$sciname] = 1; $link = $this->create_instances_from_taxon_object($rec, $link); } } echo "\n\n total rows: " . count($taxa); echo "\n total taxa: " . count($temp); echo "\n link: " . count($link) . "\n"; $free_text = self::process_free_text($func, $link); $geo_data = self::process_geo_data($link); $ecology = self::process_ecology($func, $link); $pollen_visitation = self::process_pollen_visitation($func, $link); $predation = self::process_predation($func, $link); return true; /* Waiting from SPG on items below this line */ // some sort of references // $fields = array("IDNUM", "REVSNTX", "TAXNTX"); // $references = $func->make_array($this->text_path[4], $fields, "", array(), "^"); // references for biology and morphology // $fields = array("IDNUM", "BIOLGYTX", "MORPHTX"); // $texts = $func->make_array($this->text_path[5], $fields, "", array(), "^"); // $ref_ids = array(); // $agent_ids = array(); // foreach($texts as $text) // { // $biology = $text["BIOLGYTX"]; // $morphology = $text["MORPHTX"]; // $taxon_id = @$link[$text["IDNUM"]]; // if($biology) $ref_ids = array_merge($ref_ids, get_object_reference_ids($biology)); // if($morphology) $ref_ids = array_merge($ref_ids, get_object_reference_ids($morphology)); // } // comma-separated taxon names // $fields = array("IDNUM", "AHOSTTX", "PHOSTTX"); // $taxon_remarks = $func->make_array($this->text_path[7], $fields, "", array(), "^"); // comma-separated taxon names // $fields = array("IDNUM", "PARATX", "SECHOSTX"); // $taxon_remarks2 = $func->make_array($this->text_path[8], $fields, "", array(), "^"); // wait for SPG to classify // $fields = array("IDNUM", "HYPATX", "SYNTX"); // $comments = $func->make_array($this->text_path[10], $fields, "", array(), "^"); }
<?php /* (C) 2009 Netology Joy Web Framework v.0.2, All rights reserved. * * Author(s): * Hasan Ozgan (meddah@netology.org) * * For the full copyright and license information, * please view the LICENSE file that was distributed * with this source code. */ require_library("/vendor/adodb/adodb.inc.php"); require_library("/validation.php"); class model { public $db; public $error; public $is_valid; public function __construct($conn) { $this->error = array(); $this->is_valid = new validation(); if (is_object($conn) == false) { $this->db = database::connection($conn); } else { if (get_parent_class($conn) == "ADOConnection") { $this->db = $conn; } else { die("Database Object Not Found"); } }