function combine_all_xmls($resource_id) { if ($resource_id == 100) { $species_urls = self::get_CONABIO_species_urls(); } if ($resource_id == 106) { $species_urls = self::get_Tamborine_species_urls(); } if (!$species_urls) { return; } debug("\n\n Start compiling all XML..."); $old_resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; if (!($OUT = Functions::file_open($old_resource_path, "w+"))) { return; } $str = "<?xml version='1.0' encoding='utf-8' ?>\n"; $str .= "<response\n"; $str .= " xmlns='http://www.eol.org/transfer/content/0.3'\n"; $str .= " xmlns:xsd='http://www.w3.org/2001/XMLSchema'\n"; $str .= " xmlns:dc='http://purl.org/dc/elements/1.1/'\n"; $str .= " xmlns:dcterms='http://purl.org/dc/terms/'\n"; $str .= " xmlns:geo='http://www.w3.org/2003/01/geo/wgs84_pos#'\n"; $str .= " xmlns:dwc='http://rs.tdwg.org/dwc/dwcore/'\n"; $str .= " xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'\n"; $str .= " xsi:schemaLocation='http://www.eol.org/transfer/content/0.3 http://services.eol.org/schema/content_0_3.xsd'>\n"; fwrite($OUT, $str); $i = 0; $total = sizeof($species_urls); foreach ($species_urls as $filename) { $i++; print "\n {$i} of {$total} "; if ($contents = Functions::lookup_with_cache($filename, $this->download_options)) { // manual adjustments if ($resource_id == 106) { $contents = str_ireplace(array("*"), "", $contents); } // tamborine mt. if ($resource_id == 100) { $contents = str_ireplace("http://creativecommons.org/licenses/by-nc-sa/2.5/mx/", "http://creativecommons.org/licenses/by-nc-sa/2.5/", $contents); } // conabio. if ($xml = simplexml_load_string($contents)) { if ($contents) { $pos1 = stripos($contents, "<taxon>"); $pos2 = stripos($contents, "</response>"); $str = substr($contents, $pos1, $pos2 - $pos1); fwrite($OUT, $str); } } else { print "\n\n [{$filename}] - invalid XML \n\n"; continue; } } // if($i >= 5) break; //debug } fwrite($OUT, "</response>"); fclose($OUT); print "\n All XML compiled\n -end-of-process- \n"; }
function start() { $topics = array("About the EoE", "Agricultural & Resource Economics", "Biodiversity", "Biology", "Climate Change", "Ecology", "Environmental & Earth Science", "Energy", "Environmental Law & Policy", "Environmental Humanities", "Food", "Forests", "Geography", "Hazards & Disasters", "Health", "Mining & Materials", "People", "Physics & Chemistry", "Pollution", "Society & Environment", "Water", "Weather & Climate", "Wildlife"); // $topics = array("Biodiversity"); foreach ($topics as $topic) { $this->count = array(); //it initializes every topic if ($OUT = Functions::file_open($this->html_dir . str_replace(" ", "_", $topic) . ".html", "w")) { } else { exit("\nFile access problem.\n"); } $url = $this->search_url . "&q={$topic}"; if ($html = Functions::lookup_with_cache($url, $this->download_options)) { if (preg_match("/page 1 of (.*?)<\\/title>/ims", $html, $arr)) { $count = $arr[1]; for ($i = 1; $i <= $count; $i++) { if ($html = Functions::lookup_with_cache($url . "&page={$i}", $this->download_options)) { if (preg_match_all("/<h1>(.*?)<\\/h1>/ims", $html, $arr)) { print_r($arr[1]); foreach ($arr[1] as $t) { if (preg_match("/>(.*?)<\\/a>/ims", $t, $arr2)) { $new_link_text = $arr2[1]; $word_count = str_word_count($new_link_text); if ($word_count < 3) { $new_link_text .= " ({$topic})"; } //-------------- @$this->count[$new_link_text]++; /* previous if($word_count < 3) $c = ($this->count[$new_link_text] > 1 ? $this->count[$new_link_text] : ''); //ternary else $c = ""; */ $c = $this->count[$new_link_text] > 1 ? $this->count[$new_link_text] : ''; //ternary //-------------- $t = str_replace($arr2[1], $new_link_text . " {$c}", $t); } fwrite($OUT, $t . "<br>"); } // exit; } } } } } fclose($OUT); } }
function extract_archive_file($dwca_file, $check_file_or_folder_name, $download_options = array('timeout' => 172800, 'expire_seconds' => 0)) { debug("Please wait, downloading resource document..."); $path_parts = pathinfo($dwca_file); $filename = $path_parts['basename']; $temp_dir = create_temp_dir() . "/"; debug($temp_dir); if ($file_contents = Functions::lookup_with_cache($dwca_file, $download_options)) { $temp_file_path = $temp_dir . "" . $filename; debug("temp_dir: {$temp_dir}"); debug("Extracting... {$temp_file_path}"); if (!($TMP = Functions::file_open($temp_file_path, "w"))) { return; } fwrite($TMP, $file_contents); fclose($TMP); sleep(5); if (preg_match("/^(.*)\\.(tar.gz|tgz)\$/", $dwca_file, $arr)) { $cur_dir = getcwd(); chdir($temp_dir); shell_exec("tar -zxvf {$temp_file_path}"); chdir($cur_dir); $archive_path = str_ireplace(".tar.gz", "", $temp_file_path); } elseif (preg_match("/^(.*)\\.(zip)\$/", $dwca_file, $arr) || preg_match("/mcz_for_eol(.*?)/ims", $dwca_file, $arr)) { shell_exec("unzip -ad {$temp_dir} {$temp_file_path}"); $archive_path = str_ireplace(".zip", "", $temp_file_path); } else { debug("-- archive not gzip or zip. [{$dwca_file}]"); return; } debug("archive path: [" . $archive_path . "]"); } else { debug("Connector terminated. Remote files are not ready."); return; } if (file_exists($temp_dir . $check_file_or_folder_name)) { return array('archive_path' => $temp_dir, 'temp_dir' => $temp_dir); } elseif (file_exists($archive_path . "/" . $check_file_or_folder_name)) { return array('archive_path' => $archive_path, 'temp_dir' => $temp_dir); } else { debug("Can't extract archive file. Program will terminate."); return; } }
function utility_append_text_loop() { echo "\n backing up first..."; $filename = DOC_ROOT . "/public/tmp/mycobank/mycobank_dump.txt"; copy($filename, DOC_ROOT . "/public/tmp/mycobank/mycobank_dump_backup.txt"); echo "\n backup done. \n"; for ($x = 1; $x <= 1; $x++) { $str = Functions::format_number_with_leading_zeros($x, "2"); $filename = DOC_ROOT . "/public/tmp/mycobank/mycobank_dump_add" . $str . ".txt"; if (!($READ = Functions::file_open($filename, "r"))) { return; } $contents = fread($READ, filesize($filename)); fclose($READ); echo "\n copying... {$filename}"; $filename = DOC_ROOT . "/public/tmp/mycobank/mycobank_dump.txt"; echo "\n to... {$filename}\n"; if (!($WRITE = Functions::file_open($filename, "a"))) { return; } fwrite($WRITE, $contents); fclose($WRITE); } }
private function access_pesi_service_with_retry($guid, $type) { $url = $this->cache_path . $type . "_" . $guid . ".txt"; $md5 = md5($url); $cache1 = substr($md5, 0, 2); $cache2 = substr($md5, 2, 2); if (!file_exists($this->cache_path . $cache1)) { mkdir($this->cache_path . $cache1); } if (!file_exists($this->cache_path . "{$cache1}/{$cache2}")) { mkdir($this->cache_path . "{$cache1}/{$cache2}"); } $filename = $this->cache_path . "{$cache1}/{$cache2}/{$md5}.txt"; $old_filename = "/Users/eli/eol_old_PESI_cache/" . $type . "_" . $guid . ".txt"; if (file_exists($old_filename)) { $json = file_get_contents($old_filename); // echo " --- cache retrieved from old filename"; return json_decode($json); } elseif (file_exists($filename)) { $json = file_get_contents($filename); // echo " --- cache retrieved"; return json_decode($json); } else { //create the cache $obj = self::soap_request($guid, $type); if (!($file = Functions::file_open($filename, "w"))) { return; } fwrite($file, json_encode($obj)); fclose($file); echo "\n --- cache created [{$filename}]"; usleep(500000); // 5 tenths of a second = 5/10 of a second return $obj; } }
private function divide_big_csv_file($divisor) { Functions::delete_temp_files($this->OBIS_DATA_PATH . "temp_", "csv"); $i = 0; $line = ""; $file_count = 0; $labels = ""; foreach (new FileIterator($this->OBIS_DATA_FILE) as $line_number => $linex) { $i++; $line .= $linex . "\n"; // FileIterator removes the carriage-return if (!$labels) { $labels = $line; $line = ""; continue; } if ($i == $divisor) { $i = 0; $file_count++; if (!($OUT = Functions::file_open($this->OBIS_DATA_PATH . "temp_" . $file_count . ".csv", "w"))) { return; } fwrite($OUT, $labels); fwrite($OUT, $line); fclose($OUT); $line = ""; } } // last writes if ($line) { $file_count++; if (!($OUT = Functions::file_open($this->OBIS_DATA_PATH . "temp_" . $file_count . ".csv", "w"))) { return; } fwrite($OUT, $labels); fwrite($OUT, $line); fclose($OUT); } return $file_count; }
public function convert_sheet_to_array($spreadsheet, $sheet = NULL, $startRow = NULL, $save_params = false) { require_once DOC_ROOT . '/vendor/PHPExcel/Classes/PHPExcel.php'; if (!isset($this->open_spreadsheets)) { $this->open_spreadsheets = array(); } $temp = explode('.', $spreadsheet); //to avoid E_STRICT warning - only variables can be passed by reference $ext = strtolower(end($temp)); if (isset($this->open_spreadsheets['spreadsheet'])) { $objPHPExcel = $this->open_spreadsheets['spreadsheet']; } else { if ($ext == "xls") { $objReader = \PHPExcel_IOFactory::createReader('Excel5'); } elseif ($ext == "xlsx") { $objReader = \PHPExcel_IOFactory::createReader('Excel2007'); } elseif ($ext == "zip") { $objReader = \PHPExcel_IOFactory::createReader('Excel2007'); } elseif ($ext == "csv") { $objReader = new \PHPExcel_Reader_CSV(); } if ($ext != "csv") { $objReader->setReadDataOnly(true); } $objPHPExcel = $objReader->load($spreadsheet); $this->open_spreadsheets['spreadsheet'] = $objPHPExcel; } if (is_null($sheet)) { $objWorksheet = $objPHPExcel->getActiveSheet(); } else { if ($sheet + 1 > $objPHPExcel->getSheetCount()) { return false; } $objWorksheet = $objPHPExcel->setActiveSheetIndex($sheet); } $highestRow = $objWorksheet->getHighestRow(); // e.g. 10 $highestColumn = $objWorksheet->getHighestColumn(); // e.g 'F' $highestColumnIndex = \PHPExcel_Cell::columnIndexFromString($highestColumn); // e.g. 5 $sheet_label = array(); $sheet_value = array(); if (is_null($startRow)) { $startRow = 1; } if ($save_params) { $FILE = Functions::file_open($save_params['path'] . "/" . $save_params['worksheet_title'] . ".txt", 'w'); } for ($row = $startRow; $row <= $highestRow; ++$row) { if ($save_params) { $saved_row = array(); } for ($col = 0; $col <= $highestColumnIndex; ++$col) { $cell = self::cell_value($objWorksheet, $col, $row, $ext); if ($row == $startRow) { $sheet_label[] = $cell; if ($save_params) { $saved_row[] = $cell; } } else { $index = trim($sheet_label[$col]); if ($index) { if ($save_params) { $saved_row[] = $cell; } else { $sheet_value[$index][] = $cell; } } } } if ($save_params) { fwrite($FILE, implode("\t", $saved_row) . "\n"); } } if ($save_params) { fclose($FILE); } return $sheet_value; }
private function store_name_to_text_file($name, $post_name) { /* This text file will be given to partner so they can fix their names */ if ($fp = Functions::file_open($this->TEXT_FILE_FOR_DL, "a")) { fwrite($fp, $name . "\n"); fclose($fp); } }
private function initialize_dump() { if (!($h = Functions::file_open($this->dump_file, 'w'))) { return; } fclose($h); }
private function clean_text_file($file_path) { echo "\nUpdating {$file_path}"; //read if (!($file = Functions::file_open($file_path, "r"))) { return; } $contents = fread($file, filesize($file_path)); fclose($file); $contents = str_ireplace(chr(10) . chr(13) . "\\", "", $contents); //write if (!($TMP = Functions::file_open($file_path, "w"))) { return; } fwrite($TMP, $contents); fclose($TMP); echo "\nChanges saved\n"; exit; }
private function access_dump_file($file_path, $is_array = true) { if (!($file = Functions::file_open($file_path, "r"))) { return; } if ($is_array) { $contents = json_decode(fread($file, filesize($file_path)), true); } else { $contents = fread($file, filesize($file_path)); } fclose($file); return $contents; }
function generate_text_files($text_file) { if ($temp_path = Functions::save_remote_file_to_local($text_file, array('cache' => 1, 'download_timeout_seconds' => 4800, 'download_wait_time' => 300000, 'expire_seconds' => false))) { $folder = "exported_titles"; initialize_text_files($folder); $file = Functions::file_open($temp_path, "r"); $first_row = true; $i = 0; while (!feof($file)) { $cols = explode("\t", fgets($file)); $title_id = trim(@$cols[0]); $title = trim(@$cols[3]); if (!$title) { continue; } if ($first_row) { $first_row = false; continue; } // print_r($cols); exit; echo "[{$title_id}]"; save_title_to_text($title_id, $title, $folder); save_title_to_text($title_id, $title, $folder, true); } fclose($file); } }
private function assemble_id_list() { if (!($OUT = Functions::file_open($this->tropicos_ids_list_file, "w"))) { return; } $startid = 0; // debug orig value 0; 1600267 with mediaURL and <location>; 1201245 with thumbnail size images; 100391155 near the end $count = 0; while (true) { $count++; $contents = self::create_cache("id_list", $startid); if ($contents) { $ids = json_decode($contents, true); if ($count % 100 == 0) { echo "\n count:[{$count}] " . count($ids); } $str = ""; foreach ($ids as $id) { if (isset($id["Error"])) { return; } // no more ids --- [{"Error":"No names were found"}] if ($id["NameId"]) { $str .= $id["NameId"] . "\n"; $startid = $id["NameId"]; } else { echo "\n nameid undefined"; } } $startid++; // to avoid duplicate ids, set next id to get if ($str != "") { fwrite($OUT, $str); } } else { echo "\n --server not accessible-- \n"; break; } // if($count == 1300) break; // normal operation } fclose($OUT); }
private static function lookup_with_cache_vimeo_call($vimeo, $command, $param, $options = array()) { // default expire time is 15 days if (!isset($options['expire_seconds'])) { $options['expire_seconds'] = 1296000; } //debug orig value = 1296000 if (!isset($options['timeout'])) { $options['timeout'] = 240; } if (!isset($options['cache_path'])) { $options['cache_path'] = DOC_ROOT . "tmp/cache/"; } // if(!isset($options['cache_path'])) $options['cache_path'] = "/Volumes/Eli black/eol_cache/"; //debug - only during development $url = $command . implode("_", $param); $md5 = md5($url); $cache1 = substr($md5, 0, 2); $cache2 = substr($md5, 2, 2); $options['cache_path'] .= "vimeo/"; if (!file_exists($options['cache_path'])) { mkdir($options['cache_path']); } if (!file_exists($options['cache_path'] . $cache1)) { mkdir($options['cache_path'] . $cache1); } if (!file_exists($options['cache_path'] . "{$cache1}/{$cache2}")) { mkdir($options['cache_path'] . "{$cache1}/{$cache2}"); } $cache_path = $options['cache_path'] . "{$cache1}/{$cache2}/{$md5}.cache"; if (file_exists($cache_path)) { $file_contents = file_get_contents($cache_path); if (!Functions::is_utf8($file_contents)) { $file_contents = utf8_encode($file_contents); } $obj = json_decode($file_contents); if ($file_contents || strval($file_contents) == "0") { $file_age_in_seconds = time() - filemtime($cache_path); if ($file_age_in_seconds < $options['expire_seconds']) { return $obj; } if ($options['expire_seconds'] === false) { return $obj; } } @unlink($cache_path); } if ($obj = $vimeo->call($command, $param)) { $file_contents = json_encode($obj); if ($FILE = Functions::file_open($cache_path, 'w+')) { fwrite($FILE, $file_contents); fclose($FILE); } else { if (!($h = Functions::file_open(DOC_ROOT . "/public/tmp/cant_delete.txt", 'a'))) { return; } fwrite($h, $cache_path . "\n"); fclose($h); } return $obj; } return false; }
private function load_zip_contents($zip_path, $download_options, $files, $extension) { $text_path = array(); $temp_path = create_temp_dir(); if ($file_contents = Functions::get_remote_file($zip_path, $download_options)) { $parts = pathinfo($zip_path); $temp_file_path = $temp_path . "/" . $parts["basename"]; if (!($TMP = Functions::file_open($temp_file_path, "w"))) { return; } fwrite($TMP, $file_contents); fclose($TMP); $output = shell_exec("unzip {$temp_file_path} -d {$temp_path}"); if (file_exists($temp_path . "/" . $files[0] . $extension)) { foreach ($files as $file) { $text_path[$file] = $temp_path . "/" . $file . $extension; } } else { return; } } else { debug("\n\n Connector terminated. Remote files are not ready.\n\n"); } return $text_path; }
media_resource.tab [254] taxon.tab [172] vernacular_name.tab [38] */ $timestart = microtime(1); include_once dirname(__FILE__) . "/../../config/environment.php"; require_library('connectors/VimeoAPI'); $resource_id = 214; if (!Functions::can_this_connector_run($resource_id)) { return; } $func = new VimeoAPI(); $taxa = $func->get_all_taxa(); $xml = \SchemaDocument::get_taxon_xml($taxa); $resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; if (!($OUT = Functions::file_open($resource_path, "w"))) { return; } fwrite($OUT, $xml); fclose($OUT); //start converting to DWC-A file echo "\n\nStart converting to DWC-A file..."; require_library('connectors/ConvertEOLtoDWCaAPI'); $params["eol_xml_file"] = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; $params["filename"] = "no need to mention here.xml"; $params["dataset"] = "Vimeo XML file"; $params["resource_id"] = $resource_id; $func = new ConvertEOLtoDWCaAPI($resource_id); $func->export_xml_to_archive($params, true); // true => means it is an XML file, not an archive file nor a zip file Functions::finalize_dwca_resource($resource_id);
private function get_names_from_dump($fname) { if ($filename = Functions::save_remote_file_to_local($fname, $this->download_options)) { if (!($READ = Functions::file_open($filename, "r"))) { return; } $contents = fread($READ, filesize($filename)); $contents = utf8_encode($contents); fclose($READ); $params = json_decode($contents, true); unlink($filename); return $params; } return false; }
function bhl_image_count() { $path = "http://localhost/eol_php_code/applications/content_server/resources/544.xml"; $path = "http://localhost/eol_php_code/applications/content_server/resources/544%20BHL%20in%20EOL%20Flickr%20Group.xml"; print "\n xml file: [{$path}] \n"; $reader = new \XMLReader(); $reader->open($path); $i = 0; $do_ids = array(); $names = array(); while (@$reader->read()) { if ($reader->nodeType == \XMLReader::ELEMENT && $reader->name == "taxon") { $string = $reader->readOuterXML(); $taxon = simplexml_load_string($string); $t_dc = $taxon->children("http://purl.org/dc/elements/1.1/"); $t_dwc = $taxon->children("http://rs.tdwg.org/dwc/dwcore/"); $family = trim($t_dwc->Family); $genus = trim($t_dwc->Genus); $scientificname = trim($t_dwc->ScientificName); $taxon_identifier = trim($t_dc->identifier); $i++; print "\n {$i}. {$scientificname} [{$taxon_identifier}]"; $names[$scientificname] = 1; foreach ($taxon->dataObject as $do) { $t_dc2 = $do->children("http://purl.org/dc/elements/1.1/"); $id = trim($t_dc2->identifier); $do_ids[$id] = 1; } } } $names = array_keys($names); print "\n total names: " . count($names); $do_ids = array_keys($do_ids); print "\n total do: " . count($do_ids); $filename = "BHL_images_in_EOLGroup.txt"; if (!($WRITE = Functions::file_open($filename, "w"))) { return; } fwrite($WRITE, json_encode($do_ids)); fclose($WRITE); // just testing - reading it back if (!($READ = Functions::file_open($filename, "r"))) { return; } $contents = fread($READ, filesize($filename)); fclose($READ); $do_ids = json_decode($contents, true); print "\n\n from text file: " . count($do_ids); }
function move2wiki($params, $cont_redirect = true) { $params['new_project'] = ""; $params['remove_project'] = ""; if ($val = @$params['wiki_title']) { $new_title = str_replace(" ", "_", $val); } else { $new_title = self::create_title($params); } $filename = "../temp/wiki/" . $params['page_id'] . ".wiki"; if ($file = Functions::file_open($filename, 'w')) { $go_top = "|+ style=\"caption-side:right;\"|[[Image:arrow-up icon.png|link=#top|Go top]]"; if (isset($params['header_title'])) { //ver 2 $p['page_id'] = $params['page_id']; $params['pass_title'] = $params['page_id']; // /* working but not yet requested $back = "http://" . $_SERVER['SERVER_NAME'] . "/" . MEDIAWIKI_MAIN_FOLDER . "/Custom/bhl_access/index.php?wiki_title=" . $new_title . "&search_type=wiki2php&overwrite=1"; fwrite($file, "__NOEDITSECTION__<span class=\"plainlinks\">[{$back} Go Review Excerpt - Page Editor]</span>[[Image:Back icon.png|link={$back}|Go Review Excerpt - Page Editor]]\n"); // */ // http://editors.eol.localhost/LiteratureEditor/Custom/bhl_access/index.php?search_type=wiki2php&wiki_title=42010506&overwrite=1 $pass_params = json_encode($params); $pass_params = substr($pass_params, 1, strlen($pass_params)); //remove first char $pass_params = substr($pass_params, 0, -1); //remove last char fwrite($file, "{{Void|" . $pass_params . "}}\n"); fwrite($file, "=== For EOL Ingestion ===\n"); fwrite($file, "Excerpt from " . "'''" . $params['header_title'] . "'''" . "\n\n"); fwrite($file, "'''Projects''': " . @$params['projects'] . "\n\n"); $ids = self::prep_pageids_4disp($params); foreach ($ids as $id) { $info = self::get_label_added_pageInfo($id); $link = "[http://biodiversitylibrary.org/page/{$id} {$id}]"; fwrite($file, trim(@$info['prefix'] . " " . @$info['number'] . " (" . @$info['type'] . ") PageID: {$link}") . "\n\n"); } fwrite($file, "== Bibliographic Citation ==\n"); fwrite($file, $params['bibliographicCitation'] . "\n"); fwrite($file, "== Excerpt Metadata ==\n"); fwrite($file, "'''Authors''': " . $params['agents'] . "\n\n"); $link = "[" . self::get_license_url($params['license_type']) . " " . $params['license_type'] . "]"; fwrite($file, "'''License''': " . $link . "\n\n"); fwrite($file, "'''Rights Holder''': " . $params['rightsholder'] . "\n\n"); fwrite($file, "'''Compiler''': " . @$params['compiler'] . "\n\n"); fwrite($file, "'''Supplier''': " . "Biodiversity Heritage Library" . "\n\n"); fwrite($file, "'''Language''': " . @$params['language'] . "\n\n"); $audience = self::prep_audience_4disp($params); fwrite($file, "'''Audience''': " . $audience . "\n\n"); fwrite($file, "== Taxon Associations ==\n"); $names = self::prep_names_4disp($params); foreach ($names as $name) { $link = "http://www.eol.org/pages/" . str_replace(" ", "%20", $name); $link = "[" . $link . " " . $name . "]"; fwrite($file, $link . "\n\n"); } fwrite($file, "== Title & Subchapter ==\n"); fwrite($file, "'''Subchapter''': " . self::get_subject_desc(@$params['subject_type']) . "\n\n"); fwrite($file, "'''Title''': " . $params['title_form'] . "\n\n"); fwrite($file, "== Excerpt ==\n"); $ocrs = self::prep_ocrs_4disp($params['ocr_text']); foreach ($ocrs as $ocr) { fwrite($file, "{| class=\"wikitable\" style=\"" . "" . "\" name=\"OCR Text\"\n"); fwrite($file, "{$go_top}\n"); fwrite($file, "|" . self::format_wiki((string) $ocr) . "\n"); fwrite($file, "|-\n"); fwrite($file, "|}\n"); } fwrite($file, "== References ==\n"); $ocrs = self::prep_ocrs_4disp($params['references']); foreach ($ocrs as $ocr) { fwrite($file, "{| class=\"wikitable\" style=\"" . "" . "\" name=\"References\"\n"); fwrite($file, "{$go_top}\n"); fwrite($file, "|" . self::format_wiki((string) $ocr) . "\n"); fwrite($file, "|-\n"); fwrite($file, "|}\n"); } } /* else -- not being used anymore... { //ver 1 $p['search_type'] = 'pagesearch'; $p['page_id'] = $params['page_id']; $xml = self::search_bhl($p); self::write_page_info($xml, $file, $params, $go_top); $p['search_type'] = 'itemsearch'; $p['item_id'] = $params['item_id']; $xml = self::search_bhl($p); self::write_item_info($xml, $file, $go_top); $p['search_type'] = 'titlesearch'; $p['title_id'] = $params['title_id']; $xml = self::search_bhl($p); self::write_title_info($xml, $file, $go_top); } */ fclose($file); } $temp_wiki_file = DOC_ROOT . MEDIAWIKI_MAIN_FOLDER . "/Custom/temp/wiki/" . $p['page_id'] . ".wiki"; $cmdline = "php -q " . DOC_ROOT . MEDIAWIKI_MAIN_FOLDER . "/maintenance/edit.php -u '" . $_COOKIE[MW_DBNAME . 'UserName'] . "' -s 'BHL data to Wiki " . $p['page_id'] . "' -m " . $new_title . " < " . $temp_wiki_file; $status = shell_exec($cmdline . " 2>&1"); $status = str_ireplace("done", "done. ", $status); $wiki_page = "../../wiki/" . $new_title; //now delete the temp wiki file unlink($temp_wiki_file); //make a fresh cache for the newly saved wiki $new = trim(str_replace("_", " ", $params['wiki_title'])); $no_use = self::get_wiki_text($new, array("expire_seconds" => true)); //force cache expires //make a fresh cache when calling the list: $_SESSION["title_list_cache_YN_draft"] = true; //meaning cache expires $_SESSION["title_list_cache_YN_approved"] = true; //meaning cache expires // /* // header('Location: ' . "http://" . $_SERVER['SERVER_NAME'] . "/" . MEDIAWIKI_MAIN_FOLDER . "/wiki/" . $p['page_id']); //this caused header error if ($cont_redirect) { ?> <script type="text/javascript"> location.href = '<?php echo $wiki_page; ?> '; </script> <?php } // */ /* working but not needed anymore since location.href above if(stripos($status, "Your edit was ignored because no change was made to the text") !== false) { $status = "Your edit was ignored because no change was made to the text."; //string is found $status2 = "See Wiki for Page ID:"; } else $status2 = "See newly generated Wiki for Page ID:"; self::display_message(array('type' => "highlight", 'msg' => $status)); // working also // $wiki_page = "http://" . $_SERVER['SERVER_NAME'] . "/" . MEDIAWIKI_MAIN_FOLDER . "/wiki/" . $p['page_id']; // echo "<br><a href=\"$wiki_page\">Wiki for Page ID: " . $p['page_id'] . " </a><br>"; self::display_message(array('type' => "highlight", 'msg' => "$status2 <a href=\"$wiki_page\">" . $new_title . " </a>")); */ }
private function initialize_text_file($file) { if ($OUT = Functions::file_open($file, "w")) { fclose($OUT); } }
function combine_all_eol_resource_xmls($resource_id, $files) { debug("\n\n Start compiling all XML..."); if (!($OUT = Functions::file_open(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml", "w"))) { return; } $str = "<?xml version='1.0' encoding='utf-8' ?>\n"; $str .= "<response\n"; $str .= " xmlns='http://www.eol.org/transfer/content/0.3'\n"; $str .= " xmlns:xsd='http://www.w3.org/2001/XMLSchema'\n"; $str .= " xmlns:dc='http://purl.org/dc/elements/1.1/'\n"; $str .= " xmlns:dcterms='http://purl.org/dc/terms/'\n"; $str .= " xmlns:geo='http://www.w3.org/2003/01/geo/wgs84_pos#'\n"; $str .= " xmlns:dwc='http://rs.tdwg.org/dwc/dwcore/'\n"; $str .= " xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'\n"; $str .= " xsi:schemaLocation='http://www.eol.org/transfer/content/0.3 http://services.eol.org/schema/content_0_3.xsd'>\n"; fwrite($OUT, $str); foreach (glob($files) as $filename) { debug("\n {$filename}"); if (!($READ = Functions::file_open($filename, "r"))) { return; } $contents = fread($READ, filesize($filename)); fclose($READ); if ($contents) { $pos1 = stripos($contents, "<taxon>"); $pos2 = stripos($contents, "</response>"); $str = substr($contents, $pos1, $pos2 - $pos1); if ($pos1) { fwrite($OUT, $str); } } else { debug("\n no contents [{$filename}]"); } } fwrite($OUT, "</response>"); fclose($OUT); debug("\n All XML compiled\n\n"); }
function start($resource_id) { $new_resource_path = DOC_ROOT . "temp/" . $resource_id . ".xml"; // $file = 'http://localhost/cp/Amphibiaweb/amphib_dump.xml'; $file = 'http://amphibiaweb.org/amphib_dump.xml'; if (!($new_resource_xml = Functions::lookup_with_cache($file, array('timeout' => 1200, 'download_attempts' => 5, 'expire_seconds' => 86400)))) { echo "\n\n Content partner's server is down, connector will now terminate.\n"; } else { // These may look like the same wrong characters - but they are several different wrong characters $new_resource_xml = str_replace("", "\"", $new_resource_xml); $new_resource_xml = str_replace("", "\"", $new_resource_xml); $new_resource_xml = str_replace("", "-", $new_resource_xml); if (!($OUT = Functions::file_open($new_resource_path, "w+"))) { return; } fwrite($OUT, $new_resource_xml); fclose($OUT); unset($new_resource_xml); $taxa = array(); $xml = simplexml_load_file($new_resource_path); $total = count($xml->species); $i = 0; foreach (@$xml->species as $species) { $i++; if ($i % 1000 == 0) { echo "\n {$i} of {$total} "; } $amphibID = (int) trim($species->amphib_id); $genus = format_utf8((string) trim($species->genus)); $speciesName = format_utf8((string) trim($species->species)); $order = format_utf8((string) trim($species->ordr)); $family = format_utf8((string) trim($species->family)); $commonNames = format_utf8((string) trim($species->common_name)); $commonNames = explode(",", $commonNames); $submittedBy = format_utf8((string) trim($species->submittedby)); $editedBy = format_utf8((string) trim($species->editedby)); $description = format_utf8((string) trim($species->description)); $distribution = format_utf8((string) trim($species->distribution)); $life_history = format_utf8((string) trim($species->life_history)); $trends_and_threats = format_utf8((string) trim($species->trends_and_threats)); $relation_to_humans = format_utf8((string) trim($species->relation_to_humans)); $comments = format_utf8((string) trim($species->comments)); $ref = format_utf8((string) trim($species->refs)); $separator = "<p>"; $separator = "<p>"; $ref = explode($separator, $ref); $refs = array(); foreach ($ref as $r) { $refs[] = array("fullReference" => trim($r)); } $description = fix_article($description); $distribution = fix_article($distribution); $life_history = fix_article($life_history); $trends_and_threats = fix_article($trends_and_threats); $relation_to_humans = fix_article($relation_to_humans); $comments = fix_article($comments); $pageURL = "http://amphibiaweb.org/cgi/amphib_query?where-genus=" . $genus . "&where-species=" . $speciesName . "&account=amphibiaweb"; if (!$submittedBy) { continue; } $agents = array(); if ($submittedBy) { $parts = preg_split("/(,| and )/", $submittedBy); while (list($key, $val) = each($parts)) { $val = trim($val); if (!$val) { continue; } $agentParameters = array(); $agentParameters["role"] = "author"; $agentParameters["fullName"] = $val; $agents[] = new \SchemaAgent($agentParameters); } } $nameString = trim($genus . " " . $speciesName); $taxonParameters = array(); $taxonParameters["identifier"] = $amphibID; $taxonParameters["source"] = $pageURL; $taxonParameters["kingdom"] = "Animalia"; $taxonParameters["phylum"] = "Chordata"; $taxonParameters["class"] = "Amphibia"; $taxonParameters["order"] = $order; $taxonParameters["family"] = $family; $taxonParameters["scientificName"] = $nameString; foreach ($commonNames as $common_name) { $taxonParameters['commonNames'][] = new \SchemaCommonName(array("name" => $common_name, "language" => "en")); } $taxonParameters["dataObjects"] = array(); $dataObjects = array(); if ($distribution) { $dataObjects[] = get_data_object($amphibID . "_distribution", "Distribution and Habitat", $distribution, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#Distribution", $refs, $agents, $pageURL); } if ($life_history) { $dataObjects[] = get_data_object($amphibID . "_life_history", "Life History, Abundance, Activity, and Special Behaviors", $life_history, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#Trends", $refs, $agents, $pageURL); } if ($trends_and_threats) { $dataObjects[] = get_data_object($amphibID . "_trends_threats", "Life History, Abundance, Activity, and Special Behaviors", $trends_and_threats, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#Threats", $refs, $agents, $pageURL); } if ($relation_to_humans) { $dataObjects[] = get_data_object($amphibID . "_relation_to_humans", "Relation to Humans", $relation_to_humans, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#RiskStatement", $refs, $agents, $pageURL); } if ($description != "") { if ($comments != "") { $description .= $comments; } else { if ($comments != "") { $description = $comments; } } } if ($description) { $dataObjects[] = get_data_object($amphibID . "_description", "Description", $description, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#GeneralDescription", $refs, $agents, $pageURL); } /* we didn't get <comments> if($comments) $dataObjects[] = get_data_object("Comments", $comments, "http://rs.tdwg.org/ontology/voc/SPMInfoItems#GeneralDescription", $refs, $agents, $pageURL); */ foreach ($dataObjects as $k => $v) { $taxonParameters["dataObjects"][] = new \SchemaDataObject($v); unset($v); } $taxa[] = new \SchemaTaxon($taxonParameters); //if($i >= 5) break; //debug } $new_resource_xml = \SchemaDocument::get_taxon_xml($taxa); $old_resource_path = CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"; if (!($OUT = Functions::file_open($old_resource_path, "w+"))) { return; } fwrite($OUT, $new_resource_xml); fclose($OUT); // Functions::set_resource_status_to_force_harvest($resource_id); shell_exec("rm " . $new_resource_path); // Functions::gzip_resource_xml($resource_id); // $elapsed_time_sec = microtime(1)-$timestart; // echo "\n"; // echo "elapsed time = $elapsed_time_sec sec \n"; // echo "elapsed time = " . $elapsed_time_sec/60 . " minutes \n"; // echo "elapsed time = " . $elapsed_time_sec/60/60 . " hours \n"; // echo "\n\n Done processing."; } }
ini_set('display_errors', true); define('DOWNLOAD_WAIT_TIME', '300000'); // .3 seconds wait time include_once dirname(__FILE__) . "/../../config/environment.php"; $resource_id = 15; if (!Functions::can_this_connector_run($resource_id)) { return; } require_library('FlickrAPI'); $GLOBALS['ENV_DEBUG'] = false; $auth_token = NULL; if (FlickrAPI::valid_auth_token(FLICKR_AUTH_TOKEN)) { $auth_token = FLICKR_AUTH_TOKEN; } // create new _temp file if (!($resource_file = Functions::file_open(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_temp.xml", "w+"))) { return; } // start the resource file with the XML header fwrite($resource_file, \SchemaDocument::xml_header()); // query Flickr and write results to file FlickrAPI::get_all_eol_photos($auth_token, $resource_file); // write the resource footer fwrite($resource_file, \SchemaDocument::xml_footer()); fclose($resource_file); // cache the previous version and make this new version the current version @unlink(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_previous.xml"); Functions::file_rename(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml", CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_previous.xml"); Functions::file_rename(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . "_temp.xml", CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml"); // set Flickr to force harvest if (filesize(CONTENT_RESOURCE_LOCAL_PATH . $resource_id . ".xml") > 600) {
public function save_wanted_pages() { $titles = self::get_wanted_pages_from_website(); echo "\n" . count($titles) . "\n"; if ($file = Functions::file_open(DOC_ROOT . "/temp/eoearth_wanted_pages_titles.txt", 'w')) { foreach ($titles as $title) { fwrite($file, $title . "\n"); } echo "\nFile saved OK\n"; fclose($file); } }
$xml = str_replace(array(0x73, 0x20, 0x68, 0x6f), " ", $xml); $xml = str_replace(chr(0x73) . " " . chr(0x20) . " " . chr(0x68) . " " . chr(0x6f), " ", $xml); $xml = str_replace(array(0x77, 0x65, 0x72, 0x65), " ", $xml); $xml = str_replace(array(0xe2, 0x80, 0xc2, 0xa6), " ", $xml); $xml = str_replace(array(0x6e, 0x20, 0x32, 0x30), " ", $xml); $xml = str_replace(array(0x67, 0x75, 0x65, 0x7a), " ", $xml); $xml = str_replace(array(0x73, 0x20, 0x61, 0x6e), " ", $xml); $xml = str_replace(array(0x74, 0x7a, 0x3c, 0x2f), " ", $xml); // 0x74 0x7A 0x20 0x77 $xml = format_utf8($xml); $xml = mb_convert_encoding($xml, "UTF-8", mb_detect_encoding($xml, "UTF-8, ISO-8859-1, ISO-8859-15", true)); // $xml = str_replace(array(0x74, 0x7a, 0x3c, 0x2f), " ", $xml); $xml = str_replace(array(0x74, 0x7a, 0x20, 0x77), " ", $xml); // 0x73 0x20 0x68 0x6F if (!($OUT = Functions::file_open($xml_path, "w"))) { return; } fwrite($OUT, $xml); fclose($OUT); echo "\nSaved [{$xml_path}]...\n"; } //-------- Functions::gzip_resource_xml($resource_id); $elapsed_time_sec = microtime(1) - $timestart; echo "\n"; echo "elapsed time = {$elapsed_time_sec} sec \n"; echo "elapsed time = " . $elapsed_time_sec / 60 . " minutes \n"; echo "elapsed time = " . $elapsed_time_sec / 60 / 60 . " hours \n"; echo "\n\n Done processing."; }
private function generate_taxon_extension_for_dwca($rec) { /* <field index="0" term="http://rs.tdwg.org/dwc/terms/taxonID"/> <field index="2" term="http://rs.tdwg.org/dwc/terms/scientificName"/> <field index="3" term="http://rs.tdwg.org/dwc/terms/taxonRank"/> http://rs.tdwg.org/ontology/voc/TaxonRank#Subfamily http://rs.tdwg.org/ontology/voc/TaxonRank#Tribe http://rs.tdwg.org/ontology/voc/TaxonRank#Subtribe <field index="4" term="http://rs.tdwg.org/dwc/terms/genus"/> <field index="5" term="http://rs.tdwg.org/dwc/terms/subgenus"/> http://rs.tdwg.org/ontology/voc/TaxonRank#Species */ // taxonID scientificName taxonRank Subfamily Tribe Subtribe genus subgenus Species if ($OUT = Functions::file_open($this->taxon_tab_file, "a")) { if ($val = @$rec['sciname2']) { $scientificName = $val; } elseif ($val = @$rec['sciname']) { $scientificName = $val; } fwrite($OUT, $rec['taxon_id'] . "\t"); fwrite($OUT, $scientificName . "\t"); fwrite($OUT, $rec['rank'] . "\t"); fwrite($OUT, @$rec['ancestry']['subfamily']['name'] . "\t"); fwrite($OUT, @$rec['ancestry']['tribe']['name'] . "\t"); fwrite($OUT, @$rec['ancestry']['subtribe']['name'] . "\t"); fwrite($OUT, @$rec['ancestry']['genus']['name'] . "\t"); fwrite($OUT, @$rec['ancestry']['subgenus']['name'] . "\t"); $final_species = ""; if ($species = @$rec['ancestry']['species']['name']) { $final_species = Functions::canonical_form($rec['ancestry']['genus']['name']); if ($val = @$rec['ancestry']['subgenus']['name']) { $final_species .= " (" . Functions::canonical_form($val) . ")"; } $final_species .= " {$species}"; } fwrite($OUT, $final_species . "\n"); fclose($OUT); } }
private function get_georeference_data($taxonKey, $basename) { $offset = 0; $limit = 300; $continue = true; $final = array(); $final['records'] = array(); while ($continue) { if ($offset > $this->rec_limit) { break; } //working... uncomment if u want to limit to 100,000 $url = $this->gbif_occurrence_data . $taxonKey . "&limit={$limit}"; if ($offset) { $url .= "&offset={$offset}"; } if ($json = Functions::lookup_with_cache($url, $this->download_options)) { $j = json_decode($json); // print_r($j); $recs = self::write_to_file($j); $final['records'] = array_merge($final['records'], $recs); echo "\n incremental count: " . count($recs) . "\n"; if ($j->endOfRecords) { $continue = false; } if (count($final['records']) > $this->rec_limit) { $continue = false; } //limit no. of markers in Google maps is 100K //working... uncomment if u want to limit to 100,000 } else { break; } //just try again next time... $offset += $limit; } $final['count'] = count($final['records']); $final['actual'] = count($final['records']); echo "\nFinal count: " . $final['count'] . "\n"; $json = json_encode($final); if (!($this->file = Functions::file_open($this->save_path['cluster'] . $basename . ".json", "w"))) { return; } fwrite($this->file, "var data = " . $json); fclose($this->file); /* self::write_to_supplementary_fusion_text($final); */ return $final; }
private function process_row_type_from_NHM($csv_file) { $i = 0; if (!($file = Functions::file_open($csv_file, "r"))) { return; } while (!feof($file)) { $temp = fgetcsv($file); $i++; if ($i % 100000 == 0) { echo "\n" . number_format($i) . " - "; } if ($i == 1) { $fields = $temp; if (count($fields) != 71) { // $this->debug["not71"][$fields[0]] = ''; @$this->debug["not71"]++; continue; } } else { $rec = array(); $k = 0; // 2 checks if valid record if (!$temp) { continue; } if (count($temp) != 71) { // $this->debug["not71"][$temp[0]] = ''; @$this->debug["not71"]++; continue; } foreach ($temp as $t) { $rec[$fields[$k]] = $t; $k++; } $rec_object = json_decode(json_encode($rec), FALSE); //convert array to object $f = self::convert_rec_object_to_array($rec_object); if (!self::valid_typestatus($f["http://rs.tdwg.org/dwc/terms/typeStatus"], $f["http://rs.tdwg.org/dwc/terms/scientificName"])) { continue; } self::create_type_records_nmnh($f); } if ($i >= 10000) { break; } //debug } // end while{} fclose($file); }