private function get_page_count_from_BHL_csv($contents) { $temp_path = temp_filepath(); if ($contents) { if (!($file = Functions::file_open($temp_path, "w"))) { return; } fwrite($file, $contents); fclose($file); } $page_ids = array(); $i = 0; if (!($file = Functions::file_open($temp_path, "r"))) { return; } while (!feof($file)) { $i++; if ($i == 1) { $fields = fgetcsv($file); } else { $rec = array(); $temp = fgetcsv($file); $k = 0; if (!$temp) { continue; } foreach ($temp as $t) { $rec[$fields[$k]] = $t; $k++; } $parts = pathinfo($rec["Url"]); $page_ids[$parts["filename"]] = ''; } } fclose($file); unlink($temp_path); return count(array_keys($page_ids)); }
public static function update_taxon_concept_names($taxon_concept_ids) { if (!$taxon_concept_ids) { return false; } if (is_numeric($taxon_concept_ids)) { $taxon_concept_ids = array($taxon_concept_ids); } $mysqli =& $GLOBALS['db_connection']; $started_new_transaction = false; if (!$mysqli->in_transaction()) { $mysqli->begin_transaction(); $started_new_transaction = true; } $batches = array_chunk($taxon_concept_ids, 500); foreach ($batches as $batch_ids) { usleep(500000); $name_ids = array(); $matching_ids = array(); $query = "\n (SELECT he.taxon_concept_id, he.id, he.name_id, 'preferred' as type FROM hierarchy_entries he WHERE taxon_concept_id IN (" . implode(",", $batch_ids) . ") AND ((he.published=1 AND he.visibility_id=" . Visibility::visible()->id . ") OR (he.published=0 AND he.visibility_id=" . Visibility::preview()->id . ")))\n UNION\n (SELECT he.taxon_concept_id, s.hierarchy_entry_id, s.name_id, 'synonym' as type\n FROM hierarchy_entries he\n JOIN synonyms s ON (he.id=s.hierarchy_entry_id)\n WHERE he.taxon_concept_id IN (" . implode(",", $batch_ids) . ")\n AND s.language_id=0\n AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('genbank common name')->id . "\n AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('common name')->id . "\n AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('blast name')->id . "\n AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('genbank acronym')->id . "\n AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('acronym')->id . "\n AND ((he.published=1 AND he.visibility_id=" . Visibility::visible()->id . ") OR (he.published=0 AND he.visibility_id=" . Visibility::preview()->id . ")))"; foreach ($mysqli->iterate_file($query) as $row_num => $row) { $taxon_concept_id = $row[0]; $hierarchy_entry_id = $row[1]; $name_id = $row[2]; $name_type = $row[3]; $name_ids[$name_id][$taxon_concept_id] = 1; $matching_ids[$taxon_concept_id][$name_id][$hierarchy_entry_id] = $name_type; } if ($name_ids) { //This makes sure we have a scientific name, gets the canonicalFormID $query = "SELECT n.id, n_match.id FROM names n JOIN canonical_forms cf ON (n.canonical_form_id=cf.id) JOIN names n_match ON (cf.id=n_match.canonical_form_id) WHERE n.id IN (" . implode(",", array_keys($name_ids)) . ") AND n_match.string=cf.string"; foreach ($mysqli->iterate_file($query) as $row_num => $row) { $original_name_id = $row[0]; $canonical_name_id = $row[1]; if ($original_name_id != $canonical_name_id) { foreach ($name_ids[$original_name_id] as $taxon_concept_id => $junk) { $matching_ids[$taxon_concept_id][$canonical_name_id][0] = 1; } } } } $common_names = array(); $preferred_in_language = array(); $query = "SELECT he.taxon_concept_id, he.published, he.visibility_id, s.id, s.hierarchy_id, s.hierarchy_entry_id, s.name_id, s.language_id, s.preferred, s.vetted_id FROM hierarchy_entries he JOIN synonyms s ON (he.id=s.hierarchy_entry_id) JOIN vetted v ON (s.vetted_id=v.id) WHERE he.taxon_concept_id IN (" . implode(",", $batch_ids) . ") AND s.language_id!=0 AND (s.synonym_relation_id=" . SynonymRelation::genbank_common_name()->id . " OR s.synonym_relation_id=" . SynonymRelation::common_name()->id . ") ORDER BY s.language_id, (s.hierarchy_id=" . Hierarchy::contributors()->id . ") DESC, v.view_order ASC, s.preferred DESC, s.id DESC"; foreach ($mysqli->iterate_file($query) as $row_num => $row) { $taxon_concept_id = $row[0]; $published = $row[1]; $visibility_id = $row[2]; $synonym_id = $row[3]; $hierarchy_id = $row[4]; $hierarchy_entry_id = $row[5]; $name_id = $row[6]; $language_id = $row[7]; $preferred = $row[8]; $vetted_id = $row[9]; // skipping Wikipedia common names entirely if ($hierarchy_id == @Hierarchy::wikipedia()->id) { continue; } $curator_name = $hierarchy_id == @Hierarchy::contributors()->id; $ubio_name = $hierarchy_id == @Hierarchy::ubio()->id; if ($curator_name || $ubio_name || $curator_name || $published == 1 && $visibility_id == Visibility::visible()->id) { if (isset($preferred_in_language[$taxon_concept_id][$language_id])) { $preferred = 0; } if ($preferred && $curator_name && ($vetted_id == Vetted::trusted()->id || $vetted_id == Vetted::unknown()->id)) { $preferred_in_language[$taxon_concept_id][$language_id] = 1; } else { $preferred = 0; } if (!isset($common_names[$taxon_concept_id])) { $common_names[$taxon_concept_id] = array(); } $common_names[$taxon_concept_id][] = array('synonym_id' => $synonym_id, 'language_id' => $language_id, 'name_id' => $name_id, 'hierarchy_entry_id' => $hierarchy_entry_id, 'preferred' => $preferred, 'vetted_id' => $vetted_id, 'is_curator_name' => $curator_name); } } // if there was no preferred name foreach ($common_names as $taxon_concept_id => $arr) { foreach ($arr as $key => $arr2) { if (@(!$preferred_in_language[$taxon_concept_id][$arr2['language_id']]) && ($arr2['vetted_id'] == Vetted::trusted()->id || $arr2['vetted_id'] == Vetted::unknown()->id)) { $common_names[$taxon_concept_id][$key]['preferred'] = 1; $preferred_in_language[$taxon_concept_id][$arr2['language_id']] = 1; } } } $mysqli->delete("DELETE FROM taxon_concept_names WHERE taxon_concept_id IN (" . implode(",", $batch_ids) . ")"); $tmp_file_path = temp_filepath(); if (!($LOAD_DATA_TEMP = fopen($tmp_file_path, "w+"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $tmp_file_path); return; } /* Insert the scientific names */ foreach ($matching_ids as $taxon_concept_id => $arr) { foreach ($arr as $name_id => $arr2) { foreach ($arr2 as $hierarchy_entry_id => $type) { $preferred = 0; if ($hierarchy_entry_id && $type == "preferred") { $preferred = 1; } fwrite($LOAD_DATA_TEMP, "{$taxon_concept_id}\t{$name_id}\t{$hierarchy_entry_id}\t0\t0\t{$preferred}\n"); } } } $mysqli->load_data_infile($tmp_file_path, 'taxon_concept_names'); unlink($tmp_file_path); $tmp_file_path = temp_filepath(); if (!($LOAD_DATA_TEMP = fopen($tmp_file_path, "w+"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $tmp_file_path); return; } /* Insert the common names */ foreach ($common_names as $taxon_concept_id => $arr) { foreach ($arr as $key => $arr2) { $synonym_id = $arr2['synonym_id']; $language_id = $arr2['language_id']; $name_id = $arr2['name_id']; $hierarchy_entry_id = $arr2['hierarchy_entry_id']; $preferred = $arr2['preferred']; $vetted_id = $arr2['vetted_id']; fwrite($LOAD_DATA_TEMP, "{$taxon_concept_id}\t{$name_id}\t{$hierarchy_entry_id}\t{$language_id}\t1\t{$preferred}\t{$synonym_id}\t{$vetted_id}\n"); } } $mysqli->load_data_infile($tmp_file_path, 'taxon_concept_names'); unlink($tmp_file_path); unset($matching_ids); unset($common_names); unset($name_ids); unset($preferred_in_language); $mysqli->commit(); } if ($started_new_transaction) { $mysqli->end_transaction(); } }
function write_partner_summaries($stats) { $outfile = temp_filepath(); if (!($OUT = fopen($outfile, "w+"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $outfile); return; } fwrite($OUT, implode("\t", $stats) . "\n"); fclose($OUT); $this->mysqli->load_data_infile($outfile, 'google_analytics_partner_summaries'); unlink($outfile); }
public static function save_remote_file_to_local($url, $options = array()) { if (!isset($options['download_wait_time'])) { $options['download_wait_time'] = DOWNLOAD_WAIT_TIME; } if (!isset($options['timeout'])) { $options['timeout'] = DOWNLOAD_TIMEOUT_SECONDS; } if (!isset($options['download_attempts'])) { $options['download_attempts'] = DOWNLOAD_ATTEMPTS; } $temp_path = temp_filepath(); if (isset($options['file_extension'])) { $temp_path .= "." . $options['file_extension']; } debug("\n\n Saving remote file: " . $url); debug("\n\n Temporary file: " . $temp_path); if (@$options['cache']) { $file_contents = self::lookup_with_cache($url, $options); } else { $file_contents = self::get_remote_file($url, $options); } if ($file_contents) { if (!($file = Functions::file_open($temp_path, "w"))) { return; } fwrite($file, $file_contents); fclose($file); return $temp_path; } return false; }
public function convert_to_new_schema_archive() { // previous problems reading spreadsheet if ($this->errors || @(!$this->spreadsheet_reader)) { return false; } if (!$this->is_new_schema_spreadsheet()) { return false; } $archive_temp_directory_path = $this->output_directory(); // fail if for some reason there is no valid output directory if ($archive_temp_directory_path === null) { return false; } $sheet_names = $this->spreadsheet_reader->getSheetNames(); $worksheet_fields = array(); // loop through all the worksheets in the file foreach ($sheet_names as $sheet_index => $sheet_name) { if ($sheet_name == "controlled terms") { continue; } $worksheet_reader = $this->spreadsheet_reader->setActiveSheetIndex($sheet_index); $worksheetTitle = $worksheet_reader->getTitle(); $highest_row = $worksheet_reader->getHighestRow(); // e.g. 10 $highest_column = $worksheet_reader->getHighestColumn(); // e.g 'F' $highest_column_index = \PHPExcel_Cell::columnIndexFromString($highest_column); $number_of_columns = ord($highest_column) - 64; if (!($OUTFILE = fopen($archive_temp_directory_path . "/{$sheet_name}.txt", "w+"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $archive_temp_directory_path . "/{$sheet_name}.txt"); return; } $worksheet_fields[$sheet_name] = array(); for ($row_index = 1; $row_index <= $highest_row; $row_index++) { static $i = 0; $i++; // if($i % 100 == 0) echo "$i - ".time_elapsed()."\n"; $values = array(); for ($column_index = 0; $column_index < $highest_column_index; $column_index++) { $cell = $worksheet_reader->getCellByColumnAndRow($column_index, $row_index, true); if ($cell === null) { $value = null; } else { $value = self::prepare_value($cell->getCalculatedValue()); } /* Row1: readable label Row2: field type URI Row3: required Row4: foreign key worksheet Row5: extension group label Row6: extension thesaurus URI Row7: definition Row8: comment Row9: extension thesaurus URI */ if ($row_index > 9) { $value = self::fix_spreadsheet_shorthand($sheet_name, @$worksheet_fields[$sheet_name][$column_index]['uri'], $value); $values[] = $value; } elseif ($row_index == 1) { $worksheet_fields[$sheet_name][$column_index]['label'] = $value; $values[] = $value; } elseif ($row_index == 2) { $worksheet_fields[$sheet_name][$column_index]['uri'] = $value; } elseif ($row_index == 3) { $worksheet_fields[$sheet_name][$column_index]['required'] = strtolower($value); } elseif ($row_index == 4) { $worksheet_fields[$sheet_name][$column_index]['foreign_key'] = $value; } elseif ($row_index == 5) { $worksheet_fields[$sheet_name][$column_index]['group'] = $value; } elseif ($row_index == 6) { $worksheet_fields[$sheet_name][$column_index]['thesaurus'] = $value; } elseif ($row_index == 7) { $worksheet_fields[$sheet_name][$column_index]['definition'] = $value; } elseif ($row_index == 8) { $worksheet_fields[$sheet_name][$column_index]['comment'] = $value; } elseif ($row_index == 9) { $worksheet_fields[$sheet_name][$column_index]['example'] = $value; } } if ($values) { $all_empty_values = true; foreach ($values as $value) { if ($value) { $all_empty_values = false; break; } } if (!$all_empty_values) { $row = self::$field_enclosure . implode(self::$field_enclosure . self::$field_delimeter . self::$field_enclosure, $values) . self::$field_enclosure . self::$row_delimiter; fwrite($OUTFILE, $row); } } } fclose($OUTFILE); } if (!($META = fopen($archive_temp_directory_path . "/meta.xml", "w+"))) { debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $archive_temp_directory_path . "/meta.xml"); return; } fwrite($META, self::meta_xml_from_worksheets($worksheet_fields)); fclose($META); $info = pathinfo($archive_temp_directory_path); $temporary_tarball_path = temp_filepath(); $final_tarball_path = $archive_temp_directory_path . ".tar.gz"; shell_exec("tar -czf {$temporary_tarball_path} --directory=" . $info['dirname'] . "/" . $info['basename'] . " ."); @unlink($new_tarball_path); if (copy($temporary_tarball_path, $final_tarball_path)) { unlink($temporary_tarball_path); } return $archive_temp_directory_path; }