private function get_page_count_from_BHL_csv($contents)
 {
     $temp_path = temp_filepath();
     if ($contents) {
         if (!($file = Functions::file_open($temp_path, "w"))) {
             return;
         }
         fwrite($file, $contents);
         fclose($file);
     }
     $page_ids = array();
     $i = 0;
     if (!($file = Functions::file_open($temp_path, "r"))) {
         return;
     }
     while (!feof($file)) {
         $i++;
         if ($i == 1) {
             $fields = fgetcsv($file);
         } else {
             $rec = array();
             $temp = fgetcsv($file);
             $k = 0;
             if (!$temp) {
                 continue;
             }
             foreach ($temp as $t) {
                 $rec[$fields[$k]] = $t;
                 $k++;
             }
             $parts = pathinfo($rec["Url"]);
             $page_ids[$parts["filename"]] = '';
         }
     }
     fclose($file);
     unlink($temp_path);
     return count(array_keys($page_ids));
 }
Example #2
0
 public static function update_taxon_concept_names($taxon_concept_ids)
 {
     if (!$taxon_concept_ids) {
         return false;
     }
     if (is_numeric($taxon_concept_ids)) {
         $taxon_concept_ids = array($taxon_concept_ids);
     }
     $mysqli =& $GLOBALS['db_connection'];
     $started_new_transaction = false;
     if (!$mysqli->in_transaction()) {
         $mysqli->begin_transaction();
         $started_new_transaction = true;
     }
     $batches = array_chunk($taxon_concept_ids, 500);
     foreach ($batches as $batch_ids) {
         usleep(500000);
         $name_ids = array();
         $matching_ids = array();
         $query = "\n            (SELECT he.taxon_concept_id, he.id, he.name_id, 'preferred' as type FROM hierarchy_entries he WHERE taxon_concept_id IN (" . implode(",", $batch_ids) . ") AND ((he.published=1 AND he.visibility_id=" . Visibility::visible()->id . ") OR (he.published=0 AND he.visibility_id=" . Visibility::preview()->id . ")))\n            UNION\n            (SELECT he.taxon_concept_id, s.hierarchy_entry_id, s.name_id, 'synonym' as type\n            FROM hierarchy_entries he\n            JOIN synonyms s ON (he.id=s.hierarchy_entry_id)\n            WHERE he.taxon_concept_id IN (" . implode(",", $batch_ids) . ")\n            AND s.language_id=0\n            AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('genbank common name')->id . "\n            AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('common name')->id . "\n            AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('blast name')->id . "\n            AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('genbank acronym')->id . "\n            AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('acronym')->id . "\n            AND ((he.published=1 AND he.visibility_id=" . Visibility::visible()->id . ") OR (he.published=0 AND he.visibility_id=" . Visibility::preview()->id . ")))";
         foreach ($mysqli->iterate_file($query) as $row_num => $row) {
             $taxon_concept_id = $row[0];
             $hierarchy_entry_id = $row[1];
             $name_id = $row[2];
             $name_type = $row[3];
             $name_ids[$name_id][$taxon_concept_id] = 1;
             $matching_ids[$taxon_concept_id][$name_id][$hierarchy_entry_id] = $name_type;
         }
         if ($name_ids) {
             //This makes sure we have a scientific name, gets the canonicalFormID
             $query = "SELECT n.id, n_match.id FROM names n JOIN canonical_forms cf ON (n.canonical_form_id=cf.id) JOIN names n_match ON (cf.id=n_match.canonical_form_id) WHERE n.id IN (" . implode(",", array_keys($name_ids)) . ") AND n_match.string=cf.string";
             foreach ($mysqli->iterate_file($query) as $row_num => $row) {
                 $original_name_id = $row[0];
                 $canonical_name_id = $row[1];
                 if ($original_name_id != $canonical_name_id) {
                     foreach ($name_ids[$original_name_id] as $taxon_concept_id => $junk) {
                         $matching_ids[$taxon_concept_id][$canonical_name_id][0] = 1;
                     }
                 }
             }
         }
         $common_names = array();
         $preferred_in_language = array();
         $query = "SELECT he.taxon_concept_id, he.published, he.visibility_id, s.id, s.hierarchy_id, s.hierarchy_entry_id, s.name_id, s.language_id, s.preferred, s.vetted_id FROM hierarchy_entries he JOIN synonyms s ON (he.id=s.hierarchy_entry_id) JOIN vetted v ON (s.vetted_id=v.id) WHERE he.taxon_concept_id IN (" . implode(",", $batch_ids) . ") AND s.language_id!=0 AND (s.synonym_relation_id=" . SynonymRelation::genbank_common_name()->id . " OR s.synonym_relation_id=" . SynonymRelation::common_name()->id . ") ORDER BY s.language_id, (s.hierarchy_id=" . Hierarchy::contributors()->id . ") DESC, v.view_order ASC, s.preferred DESC, s.id DESC";
         foreach ($mysqli->iterate_file($query) as $row_num => $row) {
             $taxon_concept_id = $row[0];
             $published = $row[1];
             $visibility_id = $row[2];
             $synonym_id = $row[3];
             $hierarchy_id = $row[4];
             $hierarchy_entry_id = $row[5];
             $name_id = $row[6];
             $language_id = $row[7];
             $preferred = $row[8];
             $vetted_id = $row[9];
             // skipping Wikipedia common names entirely
             if ($hierarchy_id == @Hierarchy::wikipedia()->id) {
                 continue;
             }
             $curator_name = $hierarchy_id == @Hierarchy::contributors()->id;
             $ubio_name = $hierarchy_id == @Hierarchy::ubio()->id;
             if ($curator_name || $ubio_name || $curator_name || $published == 1 && $visibility_id == Visibility::visible()->id) {
                 if (isset($preferred_in_language[$taxon_concept_id][$language_id])) {
                     $preferred = 0;
                 }
                 if ($preferred && $curator_name && ($vetted_id == Vetted::trusted()->id || $vetted_id == Vetted::unknown()->id)) {
                     $preferred_in_language[$taxon_concept_id][$language_id] = 1;
                 } else {
                     $preferred = 0;
                 }
                 if (!isset($common_names[$taxon_concept_id])) {
                     $common_names[$taxon_concept_id] = array();
                 }
                 $common_names[$taxon_concept_id][] = array('synonym_id' => $synonym_id, 'language_id' => $language_id, 'name_id' => $name_id, 'hierarchy_entry_id' => $hierarchy_entry_id, 'preferred' => $preferred, 'vetted_id' => $vetted_id, 'is_curator_name' => $curator_name);
             }
         }
         // if there was no preferred name
         foreach ($common_names as $taxon_concept_id => $arr) {
             foreach ($arr as $key => $arr2) {
                 if (@(!$preferred_in_language[$taxon_concept_id][$arr2['language_id']]) && ($arr2['vetted_id'] == Vetted::trusted()->id || $arr2['vetted_id'] == Vetted::unknown()->id)) {
                     $common_names[$taxon_concept_id][$key]['preferred'] = 1;
                     $preferred_in_language[$taxon_concept_id][$arr2['language_id']] = 1;
                 }
             }
         }
         $mysqli->delete("DELETE FROM taxon_concept_names WHERE taxon_concept_id IN (" . implode(",", $batch_ids) . ")");
         $tmp_file_path = temp_filepath();
         if (!($LOAD_DATA_TEMP = fopen($tmp_file_path, "w+"))) {
             debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $tmp_file_path);
             return;
         }
         /* Insert the scientific names */
         foreach ($matching_ids as $taxon_concept_id => $arr) {
             foreach ($arr as $name_id => $arr2) {
                 foreach ($arr2 as $hierarchy_entry_id => $type) {
                     $preferred = 0;
                     if ($hierarchy_entry_id && $type == "preferred") {
                         $preferred = 1;
                     }
                     fwrite($LOAD_DATA_TEMP, "{$taxon_concept_id}\t{$name_id}\t{$hierarchy_entry_id}\t0\t0\t{$preferred}\n");
                 }
             }
         }
         $mysqli->load_data_infile($tmp_file_path, 'taxon_concept_names');
         unlink($tmp_file_path);
         $tmp_file_path = temp_filepath();
         if (!($LOAD_DATA_TEMP = fopen($tmp_file_path, "w+"))) {
             debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $tmp_file_path);
             return;
         }
         /* Insert the common names */
         foreach ($common_names as $taxon_concept_id => $arr) {
             foreach ($arr as $key => $arr2) {
                 $synonym_id = $arr2['synonym_id'];
                 $language_id = $arr2['language_id'];
                 $name_id = $arr2['name_id'];
                 $hierarchy_entry_id = $arr2['hierarchy_entry_id'];
                 $preferred = $arr2['preferred'];
                 $vetted_id = $arr2['vetted_id'];
                 fwrite($LOAD_DATA_TEMP, "{$taxon_concept_id}\t{$name_id}\t{$hierarchy_entry_id}\t{$language_id}\t1\t{$preferred}\t{$synonym_id}\t{$vetted_id}\n");
             }
         }
         $mysqli->load_data_infile($tmp_file_path, 'taxon_concept_names');
         unlink($tmp_file_path);
         unset($matching_ids);
         unset($common_names);
         unset($name_ids);
         unset($preferred_in_language);
         $mysqli->commit();
     }
     if ($started_new_transaction) {
         $mysqli->end_transaction();
     }
 }
 function write_partner_summaries($stats)
 {
     $outfile = temp_filepath();
     if (!($OUT = fopen($outfile, "w+"))) {
         debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $outfile);
         return;
     }
     fwrite($OUT, implode("\t", $stats) . "\n");
     fclose($OUT);
     $this->mysqli->load_data_infile($outfile, 'google_analytics_partner_summaries');
     unlink($outfile);
 }
Example #4
0
 public static function save_remote_file_to_local($url, $options = array())
 {
     if (!isset($options['download_wait_time'])) {
         $options['download_wait_time'] = DOWNLOAD_WAIT_TIME;
     }
     if (!isset($options['timeout'])) {
         $options['timeout'] = DOWNLOAD_TIMEOUT_SECONDS;
     }
     if (!isset($options['download_attempts'])) {
         $options['download_attempts'] = DOWNLOAD_ATTEMPTS;
     }
     $temp_path = temp_filepath();
     if (isset($options['file_extension'])) {
         $temp_path .= "." . $options['file_extension'];
     }
     debug("\n\n Saving remote file: " . $url);
     debug("\n\n Temporary file: " . $temp_path);
     if (@$options['cache']) {
         $file_contents = self::lookup_with_cache($url, $options);
     } else {
         $file_contents = self::get_remote_file($url, $options);
     }
     if ($file_contents) {
         if (!($file = Functions::file_open($temp_path, "w"))) {
             return;
         }
         fwrite($file, $file_contents);
         fclose($file);
         return $temp_path;
     }
     return false;
 }
Example #5
0
 public function convert_to_new_schema_archive()
 {
     // previous problems reading spreadsheet
     if ($this->errors || @(!$this->spreadsheet_reader)) {
         return false;
     }
     if (!$this->is_new_schema_spreadsheet()) {
         return false;
     }
     $archive_temp_directory_path = $this->output_directory();
     // fail if for some reason there is no valid output directory
     if ($archive_temp_directory_path === null) {
         return false;
     }
     $sheet_names = $this->spreadsheet_reader->getSheetNames();
     $worksheet_fields = array();
     // loop through all the worksheets in the file
     foreach ($sheet_names as $sheet_index => $sheet_name) {
         if ($sheet_name == "controlled terms") {
             continue;
         }
         $worksheet_reader = $this->spreadsheet_reader->setActiveSheetIndex($sheet_index);
         $worksheetTitle = $worksheet_reader->getTitle();
         $highest_row = $worksheet_reader->getHighestRow();
         // e.g. 10
         $highest_column = $worksheet_reader->getHighestColumn();
         // e.g 'F'
         $highest_column_index = \PHPExcel_Cell::columnIndexFromString($highest_column);
         $number_of_columns = ord($highest_column) - 64;
         if (!($OUTFILE = fopen($archive_temp_directory_path . "/{$sheet_name}.txt", "w+"))) {
             debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $archive_temp_directory_path . "/{$sheet_name}.txt");
             return;
         }
         $worksheet_fields[$sheet_name] = array();
         for ($row_index = 1; $row_index <= $highest_row; $row_index++) {
             static $i = 0;
             $i++;
             // if($i % 100 == 0) echo "$i - ".time_elapsed()."\n";
             $values = array();
             for ($column_index = 0; $column_index < $highest_column_index; $column_index++) {
                 $cell = $worksheet_reader->getCellByColumnAndRow($column_index, $row_index, true);
                 if ($cell === null) {
                     $value = null;
                 } else {
                     $value = self::prepare_value($cell->getCalculatedValue());
                 }
                 /*
                     Row1: readable label
                     Row2: field type URI
                     Row3: required
                     Row4: foreign key worksheet
                     Row5: extension group label
                     Row6: extension thesaurus URI
                     Row7: definition
                     Row8: comment
                     Row9: extension thesaurus URI
                 */
                 if ($row_index > 9) {
                     $value = self::fix_spreadsheet_shorthand($sheet_name, @$worksheet_fields[$sheet_name][$column_index]['uri'], $value);
                     $values[] = $value;
                 } elseif ($row_index == 1) {
                     $worksheet_fields[$sheet_name][$column_index]['label'] = $value;
                     $values[] = $value;
                 } elseif ($row_index == 2) {
                     $worksheet_fields[$sheet_name][$column_index]['uri'] = $value;
                 } elseif ($row_index == 3) {
                     $worksheet_fields[$sheet_name][$column_index]['required'] = strtolower($value);
                 } elseif ($row_index == 4) {
                     $worksheet_fields[$sheet_name][$column_index]['foreign_key'] = $value;
                 } elseif ($row_index == 5) {
                     $worksheet_fields[$sheet_name][$column_index]['group'] = $value;
                 } elseif ($row_index == 6) {
                     $worksheet_fields[$sheet_name][$column_index]['thesaurus'] = $value;
                 } elseif ($row_index == 7) {
                     $worksheet_fields[$sheet_name][$column_index]['definition'] = $value;
                 } elseif ($row_index == 8) {
                     $worksheet_fields[$sheet_name][$column_index]['comment'] = $value;
                 } elseif ($row_index == 9) {
                     $worksheet_fields[$sheet_name][$column_index]['example'] = $value;
                 }
             }
             if ($values) {
                 $all_empty_values = true;
                 foreach ($values as $value) {
                     if ($value) {
                         $all_empty_values = false;
                         break;
                     }
                 }
                 if (!$all_empty_values) {
                     $row = self::$field_enclosure . implode(self::$field_enclosure . self::$field_delimeter . self::$field_enclosure, $values) . self::$field_enclosure . self::$row_delimiter;
                     fwrite($OUTFILE, $row);
                 }
             }
         }
         fclose($OUTFILE);
     }
     if (!($META = fopen($archive_temp_directory_path . "/meta.xml", "w+"))) {
         debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $archive_temp_directory_path . "/meta.xml");
         return;
     }
     fwrite($META, self::meta_xml_from_worksheets($worksheet_fields));
     fclose($META);
     $info = pathinfo($archive_temp_directory_path);
     $temporary_tarball_path = temp_filepath();
     $final_tarball_path = $archive_temp_directory_path . ".tar.gz";
     shell_exec("tar -czf {$temporary_tarball_path} --directory=" . $info['dirname'] . "/" . $info['basename'] . " .");
     @unlink($new_tarball_path);
     if (copy($temporary_tarball_path, $final_tarball_path)) {
         unlink($temporary_tarball_path);
     }
     return $archive_temp_directory_path;
 }