public static function data_objects_taxon_concepts()
 {
     // create a temporary table for this session
     $GLOBALS['db_connection']->query("DROP TABLE IF EXISTS `data_objects_taxon_concepts_tmp`");
     $GLOBALS['db_connection']->query("CREATE TABLE `data_objects_taxon_concepts_tmp` (\n                  `taxon_concept_id` int unsigned NOT NULL,\n                  `data_object_id` int unsigned NOT NULL,\n                  PRIMARY KEY  (`taxon_concept_id`, `data_object_id`),\n                  KEY `data_object_id` (`data_object_id`)\n                ) ENGINE=InnoDB DEFAULT CHARSET=utf8");
     $GLOBALS['db_connection']->insert("CREATE TABLE IF NOT EXISTS data_objects_taxon_concepts LIKE data_objects_taxon_concepts_tmp");
     // $GLOBALS['db_connection']->insert("CREATE TABLE IF NOT EXISTS data_objects_taxon_concepts_tmp LIKE data_objects_taxon_concepts");
     // $GLOBALS['db_connection']->delete("TRUNCATE TABLE data_objects_taxon_concepts_tmp");
     $start = 0;
     $stop = 0;
     $batch_size = 50000;
     $result = $GLOBALS['db_connection']->query("SELECT MIN(id) min, MAX(id) max FROM data_objects");
     if ($result && ($row = $result->fetch_assoc())) {
         $start = $row['min'];
         $stop = $row['max'];
     }
     for ($i = $start; $i < $stop; $i += $batch_size) {
         debug("Inserting " . ($i - $start + $batch_size) / $batch_size . " of " . ceil(($stop - $start) / $batch_size));
         $outfile = $GLOBALS['db_connection']->select_into_outfile("SELECT tc.id, do.id FROM taxon_concepts tc JOIN hierarchy_entries he ON (tc.id=he.taxon_concept_id) JOIN data_objects_hierarchy_entries dohe ON (he.id=dohe.hierarchy_entry_id) JOIN data_objects do ON (dohe.data_object_id=do.id) WHERE (tc.supercedure_id IS NULL OR tc.supercedure_id=0) AND (do.published=1 OR dohe.visibility_id!=" . Visibility::visible()->id . ") AND do.id BETWEEN {$i} AND " . ($i + $batch_size));
         $GLOBALS['db_connection']->load_data_infile($outfile, 'data_objects_taxon_concepts_tmp');
         unlink($outfile);
         $outfile = $GLOBALS['db_connection']->select_into_outfile("SELECT tc.id, do.id FROM taxon_concepts tc JOIN hierarchy_entries he ON (tc.id=he.taxon_concept_id) JOIN curated_data_objects_hierarchy_entries cdohe ON (he.id=cdohe.hierarchy_entry_id) JOIN data_objects do ON (cdohe.data_object_id=do.id) WHERE (tc.supercedure_id IS NULL OR tc.supercedure_id=0) AND (do.published=1 OR cdohe.visibility_id!=" . Visibility::visible()->id . ") AND do.id BETWEEN {$i} AND " . ($i + $batch_size));
         $GLOBALS['db_connection']->load_data_infile($outfile, 'data_objects_taxon_concepts_tmp');
         unlink($outfile);
         $outfile = $GLOBALS['db_connection']->select_into_outfile("SELECT tc.id, do.id FROM taxon_concepts tc JOIN users_data_objects udo ON (tc.id=udo.taxon_concept_id) JOIN data_objects do ON (udo.data_object_id=do.id) WHERE (tc.supercedure_id IS NULL OR tc.supercedure_id=0) AND (do.published=1 OR udo.visibility_id!=" . Visibility::visible()->id . ") AND do.id BETWEEN {$i} AND " . ($i + $batch_size));
         $GLOBALS['db_connection']->load_data_infile($outfile, 'data_objects_taxon_concepts_tmp');
         unlink($outfile);
     }
     $result = $GLOBALS['db_connection']->query("SELECT 1 FROM data_objects_taxon_concepts_tmp LIMIT 1");
     if ($result && ($row = $result->fetch_assoc())) {
         $GLOBALS['db_connection']->swap_tables("data_objects_taxon_concepts", "data_objects_taxon_concepts_tmp");
     }
 }
 private function lookup_curated_best_entries($start, $limit)
 {
     $curated_best_entries = array();
     $query = "SELECT c.taxon_concept_id, c.hierarchy_entry_id\n            FROM curated_taxon_concept_preferred_entries c\n            JOIN hierarchy_entries he ON (c.hierarchy_entry_id=he.id)\n            WHERE c.taxon_concept_id BETWEEN {$start} AND " . ($start + $limit) . " AND he.published=1 AND he.visibility_id=" . Visibility::visible()->id;
     foreach ($this->mysqli->iterate_file($query) as $row_num => $row) {
         $taxon_concept_id = $row[0];
         $hierarchy_entry_id = $row[1];
         $curated_best_entries[$taxon_concept_id] = $hierarchy_entry_id;
     }
     return $curated_best_entries;
 }
Example #3
0
 function lookup_ranks($start, $limit, &$taxon_concept_ids = array())
 {
     debug("querying ranks");
     $query = "SELECT he.taxon_concept_id, he.rank_id\n          FROM hierarchy_entries he USE INDEX (taxon_concept_id)\n          WHERE he.visibility_id=" . Visibility::visible()->id . "\n          AND he.taxon_concept_id ";
     if ($taxon_concept_ids) {
         $query .= "IN (" . implode(",", $taxon_concept_ids) . ")";
     } else {
         $query .= "BETWEEN {$start} AND " . ($start + $limit);
     }
     foreach ($this->mysqli_slave->iterate_file($query) as $row_num => $row) {
         $taxon_concept_id = $row[0];
         $rank_id = $row[1];
         if ($rank_id) {
             if (!isset($this->ranks_ids[$taxon_concept_id][$rank_id])) {
                 $this->ranks_ids[$taxon_concept_id][$rank_id] = 1;
             } else {
                 $this->ranks_ids[$taxon_concept_id][$rank_id] += 1;
             }
         }
     }
 }
 private static function concept_published_in_hierarchy($taxon_concept_id, $hierarchy_id)
 {
     $mysqli =& $GLOBALS['mysqli_connection'];
     $result = $mysqli->query("SELECT 1 FROM hierarchy_entries WHERE taxon_concept_id={$taxon_concept_id} AND hierarchy_id={$hierarchy_id} AND visibility_id=" . Visibility::visible()->id . " LIMIT 1");
     if ($result && ($row = $result->fetch_assoc())) {
         return true;
     }
     return false;
 }
 public function insert_gbif_references($row, $parameters)
 {
     self::debug_iterations("Inserting GBIF reference");
     $this->commit_iterations("GBIFReference", 500);
     if ($this->archive_validator->has_error_by_line('http://rs.gbif.org/terms/1.0/reference', $parameters['archive_table_definition']->location, $parameters['archive_line_number'])) {
         write_to_resource_harvesting_log("ERROR: insert_agents: insert_gbif_references" . ",file_location:" . $parameters['archive_table_definition']->location . ",line_number:" . $parameters['archive_line_number']);
         return false;
     }
     $reference_id = @self::field_decode($row['http://purl.org/dc/terms/identifier']);
     $taxon_id = @self::field_decode($row['http://rs.tdwg.org/dwc/terms/taxonID']);
     // we really only need to insert the references that relate to taxa
     if (!isset($this->taxon_ids_inserted[$taxon_id])) {
         return;
     }
     $full_reference = @self::field_decode($row['http://purl.org/dc/terms/bibliographicCitation']);
     $title = @self::field_decode($row['http://purl.org/dc/terms/title']);
     $author = @self::field_decode($row['http://purl.org/dc/terms/creator']);
     $date = @self::field_decode($row['http://purl.org/dc/terms/date']);
     $description = @self::field_decode($row['http://purl.org/dc/terms/description']);
     // $subject = @self::field_decode($row['http://purl.org/dc/terms/subject']);
     $source = @self::field_decode($row['http://purl.org/dc/terms/source']);
     $language = Language::find_or_create_for_parser(@self::field_decode($row['http://purl.org/dc/terms/language']));
     $type = @self::field_decode($row['http://purl.org/dc/terms/type']);
     if ($type != 'taxon') {
         return;
     }
     $reference_parts = array();
     if ($author) {
         $reference_parts[] = $author;
     }
     if ($date) {
         $reference_parts[] = $date;
     }
     if ($title) {
         $reference_parts[] = $title;
     }
     if ($source) {
         $reference_parts[] = $source;
     }
     if ($description) {
         $reference_parts[] = $description;
     }
     $full_reference = implode(". ", $reference_parts);
     $full_reference = str_replace("..", ".", $full_reference);
     $full_reference = str_replace("  ", " ", $full_reference);
     if (!$full_reference) {
         return;
     }
     $title = null;
     $author = null;
     $date = null;
     $description = null;
     $source = null;
     $type = null;
     if ($taxon_info = @$this->taxon_ids_inserted[$taxon_id]) {
         self::uncompress_array($taxon_info);
         $params = array("provider_mangaed_id" => $reference_id, "full_reference" => $full_reference, "title" => $title, "authors" => $author, "publication_created_at" => @$created ?: '0000-00-00 00:00:00', "language_id" => @$language->id ?: 0);
         $reference = Reference::find_or_create($params);
         $he_id = $taxon_info['hierarchy_entry_id'];
         $this->mysqli->insert("INSERT IGNORE INTO hierarchy_entries_refs (hierarchy_entry_id, ref_id) VALUES ({$he_id}, {$reference->id})");
         $this->mysqli->query("UPDATE refs SET published=1, visibility_id=" . Visibility::visible()->id . " WHERE id={$reference->id}");
         // TODO: find_or_create doesn't work here because of the dual primary key
         // HierarchyEntriesRef::find_or_create(array(
         //     'hierarchy_entry_id'    => $hierarchy_entry_id,
         //     'ref_id'                => $reference->id));
     }
 }
Example #6
0
 public static function update_taxon_concept_names($taxon_concept_ids)
 {
     if (!$taxon_concept_ids) {
         return false;
     }
     if (is_numeric($taxon_concept_ids)) {
         $taxon_concept_ids = array($taxon_concept_ids);
     }
     $mysqli =& $GLOBALS['db_connection'];
     $started_new_transaction = false;
     if (!$mysqli->in_transaction()) {
         $mysqli->begin_transaction();
         $started_new_transaction = true;
     }
     $batches = array_chunk($taxon_concept_ids, 500);
     foreach ($batches as $batch_ids) {
         usleep(500000);
         $name_ids = array();
         $matching_ids = array();
         $query = "\n            (SELECT he.taxon_concept_id, he.id, he.name_id, 'preferred' as type FROM hierarchy_entries he WHERE taxon_concept_id IN (" . implode(",", $batch_ids) . ") AND ((he.published=1 AND he.visibility_id=" . Visibility::visible()->id . ") OR (he.published=0 AND he.visibility_id=" . Visibility::preview()->id . ")))\n            UNION\n            (SELECT he.taxon_concept_id, s.hierarchy_entry_id, s.name_id, 'synonym' as type\n            FROM hierarchy_entries he\n            JOIN synonyms s ON (he.id=s.hierarchy_entry_id)\n            WHERE he.taxon_concept_id IN (" . implode(",", $batch_ids) . ")\n            AND s.language_id=0\n            AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('genbank common name')->id . "\n            AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('common name')->id . "\n            AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('blast name')->id . "\n            AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('genbank acronym')->id . "\n            AND s.synonym_relation_id!=" . SynonymRelation::find_or_create_by_translated_label('acronym')->id . "\n            AND ((he.published=1 AND he.visibility_id=" . Visibility::visible()->id . ") OR (he.published=0 AND he.visibility_id=" . Visibility::preview()->id . ")))";
         foreach ($mysqli->iterate_file($query) as $row_num => $row) {
             $taxon_concept_id = $row[0];
             $hierarchy_entry_id = $row[1];
             $name_id = $row[2];
             $name_type = $row[3];
             $name_ids[$name_id][$taxon_concept_id] = 1;
             $matching_ids[$taxon_concept_id][$name_id][$hierarchy_entry_id] = $name_type;
         }
         if ($name_ids) {
             //This makes sure we have a scientific name, gets the canonicalFormID
             $query = "SELECT n.id, n_match.id FROM names n JOIN canonical_forms cf ON (n.canonical_form_id=cf.id) JOIN names n_match ON (cf.id=n_match.canonical_form_id) WHERE n.id IN (" . implode(",", array_keys($name_ids)) . ") AND n_match.string=cf.string";
             foreach ($mysqli->iterate_file($query) as $row_num => $row) {
                 $original_name_id = $row[0];
                 $canonical_name_id = $row[1];
                 if ($original_name_id != $canonical_name_id) {
                     foreach ($name_ids[$original_name_id] as $taxon_concept_id => $junk) {
                         $matching_ids[$taxon_concept_id][$canonical_name_id][0] = 1;
                     }
                 }
             }
         }
         $common_names = array();
         $preferred_in_language = array();
         $query = "SELECT he.taxon_concept_id, he.published, he.visibility_id, s.id, s.hierarchy_id, s.hierarchy_entry_id, s.name_id, s.language_id, s.preferred, s.vetted_id FROM hierarchy_entries he JOIN synonyms s ON (he.id=s.hierarchy_entry_id) JOIN vetted v ON (s.vetted_id=v.id) WHERE he.taxon_concept_id IN (" . implode(",", $batch_ids) . ") AND s.language_id!=0 AND (s.synonym_relation_id=" . SynonymRelation::genbank_common_name()->id . " OR s.synonym_relation_id=" . SynonymRelation::common_name()->id . ") ORDER BY s.language_id, (s.hierarchy_id=" . Hierarchy::contributors()->id . ") DESC, v.view_order ASC, s.preferred DESC, s.id DESC";
         foreach ($mysqli->iterate_file($query) as $row_num => $row) {
             $taxon_concept_id = $row[0];
             $published = $row[1];
             $visibility_id = $row[2];
             $synonym_id = $row[3];
             $hierarchy_id = $row[4];
             $hierarchy_entry_id = $row[5];
             $name_id = $row[6];
             $language_id = $row[7];
             $preferred = $row[8];
             $vetted_id = $row[9];
             // skipping Wikipedia common names entirely
             if ($hierarchy_id == @Hierarchy::wikipedia()->id) {
                 continue;
             }
             $curator_name = $hierarchy_id == @Hierarchy::contributors()->id;
             $ubio_name = $hierarchy_id == @Hierarchy::ubio()->id;
             if ($curator_name || $ubio_name || $curator_name || $published == 1 && $visibility_id == Visibility::visible()->id) {
                 if (isset($preferred_in_language[$taxon_concept_id][$language_id])) {
                     $preferred = 0;
                 }
                 if ($preferred && $curator_name && ($vetted_id == Vetted::trusted()->id || $vetted_id == Vetted::unknown()->id)) {
                     $preferred_in_language[$taxon_concept_id][$language_id] = 1;
                 } else {
                     $preferred = 0;
                 }
                 if (!isset($common_names[$taxon_concept_id])) {
                     $common_names[$taxon_concept_id] = array();
                 }
                 $common_names[$taxon_concept_id][] = array('synonym_id' => $synonym_id, 'language_id' => $language_id, 'name_id' => $name_id, 'hierarchy_entry_id' => $hierarchy_entry_id, 'preferred' => $preferred, 'vetted_id' => $vetted_id, 'is_curator_name' => $curator_name);
             }
         }
         // if there was no preferred name
         foreach ($common_names as $taxon_concept_id => $arr) {
             foreach ($arr as $key => $arr2) {
                 if (@(!$preferred_in_language[$taxon_concept_id][$arr2['language_id']]) && ($arr2['vetted_id'] == Vetted::trusted()->id || $arr2['vetted_id'] == Vetted::unknown()->id)) {
                     $common_names[$taxon_concept_id][$key]['preferred'] = 1;
                     $preferred_in_language[$taxon_concept_id][$arr2['language_id']] = 1;
                 }
             }
         }
         $mysqli->delete("DELETE FROM taxon_concept_names WHERE taxon_concept_id IN (" . implode(",", $batch_ids) . ")");
         $tmp_file_path = temp_filepath();
         if (!($LOAD_DATA_TEMP = fopen($tmp_file_path, "w+"))) {
             debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $tmp_file_path);
             return;
         }
         /* Insert the scientific names */
         foreach ($matching_ids as $taxon_concept_id => $arr) {
             foreach ($arr as $name_id => $arr2) {
                 foreach ($arr2 as $hierarchy_entry_id => $type) {
                     $preferred = 0;
                     if ($hierarchy_entry_id && $type == "preferred") {
                         $preferred = 1;
                     }
                     fwrite($LOAD_DATA_TEMP, "{$taxon_concept_id}\t{$name_id}\t{$hierarchy_entry_id}\t0\t0\t{$preferred}\n");
                 }
             }
         }
         $mysqli->load_data_infile($tmp_file_path, 'taxon_concept_names');
         unlink($tmp_file_path);
         $tmp_file_path = temp_filepath();
         if (!($LOAD_DATA_TEMP = fopen($tmp_file_path, "w+"))) {
             debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $tmp_file_path);
             return;
         }
         /* Insert the common names */
         foreach ($common_names as $taxon_concept_id => $arr) {
             foreach ($arr as $key => $arr2) {
                 $synonym_id = $arr2['synonym_id'];
                 $language_id = $arr2['language_id'];
                 $name_id = $arr2['name_id'];
                 $hierarchy_entry_id = $arr2['hierarchy_entry_id'];
                 $preferred = $arr2['preferred'];
                 $vetted_id = $arr2['vetted_id'];
                 fwrite($LOAD_DATA_TEMP, "{$taxon_concept_id}\t{$name_id}\t{$hierarchy_entry_id}\t{$language_id}\t1\t{$preferred}\t{$synonym_id}\t{$vetted_id}\n");
             }
         }
         $mysqli->load_data_infile($tmp_file_path, 'taxon_concept_names');
         unlink($tmp_file_path);
         unset($matching_ids);
         unset($common_names);
         unset($name_ids);
         unset($preferred_in_language);
         $mysqli->commit();
     }
     if ($started_new_transaction) {
         $mysqli->end_transaction();
     }
 }
 function add_data_object($hierarchy_entry, $options)
 {
     $d = $options[0];
     $parameters = $options[1];
     // Add default values from resource
     if (@(!$d->rights_statement) && $this->resource->rights_statement) {
         $d->rights_statement = $this->resource->rights_statement;
     }
     if (@(!$d->rights_holder) && $this->resource->rights_holder) {
         $d->rights_holder = $this->resource->rights_holder;
     }
     if (@(!$d->license_id) && $this->resource->license_id) {
         $d->license_id = $this->resource->license_id;
     }
     if (@(!$d->language_id) && $this->resource->language_id) {
         $d->language_id = $this->resource->language_id;
     }
     // print_r($d);
     list($data_object, $status, $existing_data_object) = DataObject::find_and_compare($this->resource, $d, $this->content_manager);
     $GLOBALS['db_connection']->commit();
     if (@(!$data_object->id)) {
         return false;
     }
     $vetted_id = Vetted::unknown()->id;
     $visibility_id = Visibility::preview()->id;
     if ($existing_data_object) {
         // if($existing_data_object && ($this->resource->title != "Wikipedia" || $status == "Unchanged") && $v = $existing_data_object->best_vetted())
         if ($existing_data_object && ($v = $existing_data_object->best_vetted())) {
             $vetted_id = $v->id;
         }
         if ($existing_data_object && ($v = $existing_data_object->best_visibility())) {
             if ($v != Visibility::visible()) {
                 // if the existing object is visible - this will go on as preview
                 // otherwise this will inherit the visibility (unpublished)
                 $visibility_id = $v->id;
             }
         }
     }
     // we only delete the object's entries the first time we see it, to allow for multiple taxa per object
     if (!isset($this->harvested_data_object_ids[$data_object->id])) {
         $data_object->delete_hierarchy_entries();
     }
     $this->harvested_data_object_ids[$data_object->id] = 1;
     $hierarchy_entry->add_data_object($data_object->id, $vetted_id, $visibility_id);
     $this->resource->harvest_event->add_data_object($data_object, $status);
     if ($status != "Reused") {
         $i = 0;
         $data_object->delete_agents();
         foreach ($parameters['agents'] as &$a) {
             $agent = Agent::find_or_create($a);
             if ($agent->logo_url && !$agent->logo_cache_url) {
                 if ($logo_cache_url = $this->content_manager->grab_file($agent->logo_url, "partner")) {
                     $agent->logo_cache_url = $logo_cache_url;
                     $agent->save();
                 }
             }
             $data_object->add_agent($agent->id, @$a['agent_role']->id ?: 0, $i);
             unset($a);
             $i++;
         }
         $data_object->delete_audiences();
         foreach ($parameters['audiences'] as &$a) {
             $data_object->add_audience($a->id);
             unset($a);
         }
         $data_object->delete_info_items();
         $data_object->delete_table_of_contents();
         if (@$parameters['info_items']) {
             foreach ($parameters['info_items'] as &$ii) {
                 $data_object->add_info_item($ii->id);
                 unset($ii);
             }
         }
         $data_object->delete_refs();
         if (@$parameters['refs']) {
             foreach ($parameters['refs'] as &$r) {
                 if (@$r->id) {
                     $data_object->add_reference($r->id);
                     $r->publish();
                 }
                 unset($r);
             }
         }
     }
 }
 private function lookup_family($name, $synonyms, $ancestors)
 {
     $order = @$ancestors[15];
     $class = @$ancestors[11];
     $phylum = @$ancestors[6];
     if (!$order && !$class && !$phylum) {
         echo "This is a line that doesnt have a order, class or phylum:\n{$line_number}: {$line} :: {$name}\n\n\n";
         exit;
     }
     $synonyms[] = $name;
     $result = $this->mysqli->query("\n            (SELECT n.id name_id, h.id hierarchy_id, h.browsable, he.taxon_concept_id, 'valid' match_type\n                FROM canonical_forms cf\n                JOIN names n ON (cf.id=n.canonical_form_id)\n                JOIN hierarchy_entries he ON (n.id=he.name_id)\n                JOIN hierarchies h ON (he.hierarchy_id=h.id)\n                WHERE cf.string IN ('" . implode("','", $synonyms) . "')\n                AND he.published=1 AND he.visibility_id=" . Visibility::visible()->id . ")\n            UNION\n            (SELECT n.id name_id, h.id hierarchy_id, h.browsable, he.taxon_concept_id, 'synonym' match_type\n                FROM canonical_forms cf\n                JOIN names n ON (cf.id=n.canonical_form_id)\n                JOIN synonyms s ON (n.id=s.name_id AND s.synonym_relation_id=" . SynonymRelation::synonym()->id . ")\n                JOIN hierarchy_entries he ON (s.hierarchy_entry_id=he.id)\n                JOIN hierarchies h ON (he.hierarchy_id=h.id)\n                WHERE cf.string IN ('" . implode("','", $synonyms) . "')\n                AND he.published=1 AND he.visibility_id=" . Visibility::visible()->id . ")");
     if ($result && $result->num_rows) {
         return $this->get_best_concept_from_result($result, $name);
     }
 }
Example #9
0
 function get_word_count($taxon_concept_id, $chapter)
 {
     $concept_data_object_counts = array();
     $text_id = DataType::find_or_create_by_schema_value('http://purl.org/dc/dcmitype/Text')->id;
     $trusted_id = Vetted::trusted()->id;
     $untrusted_id = Vetted::untrusted()->id;
     $unreviewed_id = Vetted::unknown()->id;
     if ($chapter == "brief summary") {
         $toc_id = TranslatedTableOfContent::find_or_create_by_label('Brief Summary')->table_of_contents_id;
     } elseif ($chapter == "comprehensive description") {
         $toc_id = TranslatedTableOfContent::find_or_create_by_label('Comprehensive Description')->table_of_contents_id;
     }
     $query = "SELECT dotoc.toc_id,do.description, dohe.vetted_id FROM data_objects_taxon_concepts dotc \n                  JOIN data_objects do ON dotc.data_object_id = do.id LEFT JOIN data_objects_table_of_contents dotoc ON do.id = dotoc.data_object_id \n                  JOIN data_objects_hierarchy_entries dohe on do.id = dohe.data_object_id\n                  WHERE do.published = 1 AND dohe.visibility_id =" . Visibility::visible()->id . " AND do.data_type_id = {$text_id} AND dotc.taxon_concept_id = {$taxon_concept_id} AND dotoc.toc_id = {$toc_id}\n                  UNION\n                  SELECT dotoc.toc_id,do.description, udo.vetted_id FROM data_objects_taxon_concepts dotc \n                  JOIN data_objects do ON dotc.data_object_id = do.id LEFT JOIN data_objects_table_of_contents dotoc ON do.id = dotoc.data_object_id \n                  JOIN users_data_objects udo on do.id = udo.data_object_id\n                  WHERE do.published = 1 AND udo.visibility_id =" . Visibility::visible()->id . " AND do.data_type_id = {$text_id} AND dotc.taxon_concept_id = {$taxon_concept_id} AND dotoc.toc_id = {$toc_id}";
     $result = $this->mysqli_slave->query($query);
     while ($result && ($row = $result->fetch_assoc())) {
         $description = $row['description'];
         $vetted_id = $row['vetted_id'];
         $words_count = str_word_count(strip_tags($description), 0);
         @($concept_data_object_counts['total_w'] += $words_count);
         if ($vetted_id == $trusted_id) {
             @($concept_data_object_counts['t_w'] += $words_count);
         } elseif ($vetted_id == $untrusted_id) {
             @($concept_data_object_counts['ut_w'] += $words_count);
         } elseif ($vetted_id == $unreviewed_id) {
             @($concept_data_object_counts['ur_w'] += $words_count);
         }
     }
     return @$concept_data_object_counts['total_w'];
 }
 private function load_all_user_object_associations()
 {
     $this->user_data_objects_taxa = array();
     $query = "SELECT do.id, udo.taxon_concept_id\n            FROM data_objects do\n            JOIN users_data_objects udo ON (do.id=udo.data_object_id)\n            WHERE do.published = 1\n            AND udo.vetted_id IN (" . implode(",", array(Vetted::trusted()->id, Vetted::unknown()->id)) . ")\n            AND udo.visibility_id = " . Visibility::visible()->id;
     foreach ($this->mysqli_slave->iterate($query) as $row) {
         $this->user_data_objects_taxa[$row['id']][$row['taxon_concept_id']] = true;
     }
 }
 function get_data_objects_count($batch_size = 100000)
 {
     $image_id = DataType::image()->id;
     $text_id = DataType::text()->id;
     $video_id = DataType::video()->id;
     $sound_id = DataType::sound()->id;
     $flash_id = DataType::flash()->id;
     $youtube_id = DataType::youtube()->id;
     $iucn_id = DataType::iucn()->id;
     $data_type_label[$text_id] = 'text';
     $data_type_label[$video_id] = 'video';
     $data_type_label[$sound_id] = 'sound';
     $data_type_label[$flash_id] = 'flash';
     $data_type_label[$youtube_id] = 'youtube';
     $data_type_label[$iucn_id] = 'iucn';
     $data_type_order_in_file = array("text", "video", "sound", "flash", "youtube", "iucn");
     $trusted_id = Vetted::trusted()->id;
     $untrusted_id = Vetted::untrusted()->id;
     $unreviewed_id = Vetted::unknown()->id;
     $raw_stats = array();
     $concept_info_items = array();
     $concept_references = array();
     for ($i = $this->min_taxon_concept_id; $i <= $this->max_taxon_concept_id; $i += $batch_size) {
         $this->print_status($i, $batch_size);
         $sql = "SELECT  do.guid,\n                            dotc.taxon_concept_id,\n                            do.data_type_id,\n                            doii.info_item_id,\n                            dor.ref_id,\n                            REPLACE(REPLACE(do.description, '\\\\n', ' '), '\\\\r', ' '),\n                            dohe.vetted_id,\n                            do.id\n            FROM data_objects_taxon_concepts dotc\n            STRAIGHT_JOIN data_objects do ON (dotc.data_object_id = do.id)\n            JOIN data_objects_hierarchy_entries dohe ON (do.id=dohe.data_object_id)\n            LEFT JOIN data_objects_info_items doii ON (do.id = doii.data_object_id)\n            LEFT JOIN data_objects_refs dor ON (do.id = dor.data_object_id)\n            WHERE do.published = 1 AND dohe.visibility_id = " . Visibility::visible()->id . " AND do.data_type_id != {$image_id}";
         if ($this->test_taxon_concept_ids) {
             $sql .= " AND dotc.taxon_concept_id IN (" . $this->test_taxon_concept_ids . ")";
         } else {
             $sql .= " AND dotc.taxon_concept_id BETWEEN {$i} AND " . ($i + $batch_size);
         }
         $counted_data_objects = array();
         foreach ($this->mysqli_slave->iterate_file($sql) as $row_number => $row) {
             $taxon_concept_id = trim($row[0]);
             $data_type_id = trim($row[1]);
             $info_item_id = trim($row[2]);
             $ref_id = trim($row[3]);
             $description = trim($row[4]);
             $vetted_id = trim($row[5]);
             $data_object_id = trim($row[6]);
             if (isset($counted_data_objects[$taxon_concept_id][$data_object_id])) {
                 continue;
             }
             $counted_data_objects[$taxon_concept_id][$data_object_id] = 1;
             $label = @$data_type_label[$data_type_id];
             $words_count = str_word_count(strip_tags($description), 0);
             @$raw_stats[$taxon_concept_id][$label]['total']++;
             @($raw_stats[$taxon_concept_id][$label]['total_w'] += $words_count);
             if ($vetted_id == $trusted_id) {
                 @$raw_stats[$taxon_concept_id][$label]['t']++;
                 @($raw_stats[$taxon_concept_id][$label]['t_w'] += $words_count);
             } elseif ($vetted_id == $untrusted_id) {
                 @$raw_stats[$taxon_concept_id][$label]['ut']++;
                 @($raw_stats[$taxon_concept_id][$label]['ut_w'] += $words_count);
             } elseif ($vetted_id == $unreviewed_id) {
                 @$raw_stats[$taxon_concept_id][$label]['ur']++;
                 @($raw_stats[$taxon_concept_id][$label]['ur_w'] += $words_count);
             }
             $concept_info_items[$taxon_concept_id][$info_item_id] = '';
             $concept_references[$taxon_concept_id][$ref_id] = '';
         }
         foreach ($raw_stats as $taxon_concept_id => $stats) {
             $new_value = "";
             # the stats need to go into the file in a certain order to be imported into the MySQL table
             foreach ($data_type_order_in_file as $data_type) {
                 $new_value = @$stats[$data_type]['total'];
                 $new_value .= "\t" . @$stats[$data_type]['t'];
                 $new_value .= "\t" . @$stats[$data_type]['ut'];
                 $new_value .= "\t" . @$stats[$data_type]['ur'];
                 $new_value .= "\t" . @$stats[$data_type]['total_w'];
                 $new_value .= "\t" . @$stats[$data_type]['t_w'];
                 $new_value .= "\t" . @$stats[$data_type]['ut_w'];
                 $new_value .= "\t" . @$stats[$data_type]['ur_w'];
             }
             $raw_stats[$taxon_concept_id] = $new_value;
         }
         $this->save_category_stats($raw_stats, "get_data_objects_count");
         $raw_stats = array();
         if ($this->test_taxon_concept_ids) {
             break;
         }
     }
     // $this->save_to_json_file($concept_info_items, "concept_info_items");
     // unset($concept_info_items);
     //
     // $this->save_to_json_file($concept_references, "concept_references");
     // unset($concept_references);
 }
 function get_data_objects_count($batch_size = 100000)
 {
     $time_start = time_elapsed();
     $concept_data_object_counts = array();
     $concept_data_object_maps = array();
     $concept_info_items = array();
     $concept_references = array();
     $image_id = DataType::image()->id;
     $map_id = DataType::map()->id;
     $text_id = DataType::text()->id;
     $video_id = DataType::video()->id;
     $sound_id = DataType::sound()->id;
     $flash_id = DataType::flash()->id;
     $youtube_id = DataType::youtube()->id;
     $iucn_id = DataType::iucn()->id;
     $data_type_label[$image_id] = 'image';
     $data_type_label[$sound_id] = 'sound';
     $data_type_label[$text_id] = 'text';
     $data_type_label[$video_id] = 'video';
     $data_type_label[$iucn_id] = 'iucn';
     $data_type_label[$flash_id] = 'flash';
     $data_type_label[$youtube_id] = 'youtube';
     $trusted_id = Vetted::trusted()->id;
     $untrusted_id = Vetted::untrusted()->id;
     $unreviewed_id = Vetted::unknown()->id;
     for ($i = $this->min_taxon_concept_id; $i <= $this->max_taxon_concept_id; $i += $batch_size) {
         print "\n dataObjects, its infoItems, its references [2 of 14] {$i} \n";
         $sql = "SELECT dotc.taxon_concept_id tc_id, do.data_type_id, doii.info_item_id, dor.ref_id, do.description, dohe.vetted_id, do.data_subtype_id\r\n                FROM data_objects_taxon_concepts dotc \r\n                JOIN data_objects do ON dotc.data_object_id = do.id \r\n                LEFT JOIN data_objects_info_items doii ON do.id = doii.data_object_id \r\n                LEFT JOIN data_objects_refs dor ON do.id = dor.data_object_id \r\n                JOIN data_objects_hierarchy_entries dohe on do.id = dohe.data_object_id\r\n                WHERE do.published=1 AND dohe.visibility_id=" . Visibility::visible()->id . " AND dohe.vetted_id != {$untrusted_id} ";
         //." AND do.data_type_id <> $image_id "; this has to be removed to count maps
         if (isset($GLOBALS['test_taxon_concept_ids'])) {
             $sql .= " and dotc.taxon_concept_id IN (" . implode(",", $GLOBALS['test_taxon_concept_ids']) . ")";
         } else {
             $sql .= " AND dotc.taxon_concept_id BETWEEN {$i} AND " . ($i + $batch_size);
         }
         $sql .= "\r\n                UNION\r\n                SELECT dotc.taxon_concept_id tc_id, do.data_type_id, doii.info_item_id, dor.ref_id, do.description, udo.vetted_id, do.data_subtype_id\r\n                    FROM data_objects_taxon_concepts dotc \r\n                    JOIN data_objects do ON dotc.data_object_id = do.id \r\n                    LEFT JOIN data_objects_info_items doii ON do.id = doii.data_object_id \r\n                    LEFT JOIN data_objects_refs dor ON do.id = dor.data_object_id \r\n                    JOIN users_data_objects udo on do.id = udo.data_object_id\r\n                    WHERE do.published=1 AND udo.visibility_id=" . Visibility::visible()->id . "\r\n                ";
         if (isset($GLOBALS['test_taxon_concept_ids'])) {
             $sql .= " and dotc.taxon_concept_id IN (" . implode(",", $GLOBALS['test_taxon_concept_ids']) . ")";
         } else {
             $sql .= " AND dotc.taxon_concept_id BETWEEN {$i} AND " . ($i + $batch_size);
         }
         $outfile = $this->mysqli_slave->select_into_outfile($sql);
         $FILE = fopen($outfile, "r");
         if (!$FILE) {
             print "!! ERROR: Could not read {$outfile}";
             debug("!! ERROR: Could not read {$outfile}");
             return;
         }
         $num_rows = 0;
         while (!feof($FILE)) {
             if ($line = fgets($FILE)) {
                 $num_rows++;
                 $line = trim($line);
                 $fields = explode("\t", $line);
                 $tc_id = trim($fields[0]);
                 $data_type_id = trim($fields[1]);
                 $info_item_id = trim($fields[2]);
                 $ref_id = trim($fields[3]);
                 $description = trim($fields[4]);
                 $vetted_id = trim($fields[5]);
                 $data_subtype_id = trim($fields[6]);
                 $label = @$data_type_label[$data_type_id];
                 if ($data_subtype_id != $map_id) {
                     $words_count = str_word_count(strip_tags($description), 0);
                     @$concept_data_object_counts[$tc_id][$label]['total']++;
                     @($concept_data_object_counts[$tc_id][$label]['total_w'] += $words_count);
                     if ($vetted_id == $trusted_id) {
                         @$concept_data_object_counts[$tc_id][$label]['t']++;
                         @($concept_data_object_counts[$tc_id][$label]['t_w'] += $words_count);
                     } elseif ($vetted_id == $untrusted_id) {
                         @$concept_data_object_counts[$tc_id][$label]['ut']++;
                         @($concept_data_object_counts[$tc_id][$label]['ut_w'] += $words_count);
                     } elseif ($vetted_id == $unreviewed_id) {
                         @$concept_data_object_counts[$tc_id][$label]['ur']++;
                         @($concept_data_object_counts[$tc_id][$label]['ur_w'] += $words_count);
                     }
                     $concept_info_items[$tc_id][$info_item_id] = '';
                     $concept_references[$tc_id][$ref_id] = '';
                 } else {
                     @$concept_data_object_maps[$tc_id][$label]['total']++;
                     if ($vetted_id == $trusted_id) {
                         @$concept_data_object_maps[$tc_id][$label]['t']++;
                     } elseif ($vetted_id == $untrusted_id) {
                         @$concept_data_object_maps[$tc_id][$label]['ut']++;
                     } elseif ($vetted_id == $unreviewed_id) {
                         @$concept_data_object_maps[$tc_id][$label]['ur']++;
                     }
                 }
             }
         }
         fclose($FILE);
         unlink($outfile);
         print "\n num_rows: {$num_rows}";
     }
     self::save_to_json_file($concept_info_items, "concept_info_items");
     unset($concept_info_items);
     self::save_to_json_file($concept_references, "concept_references");
     unset($concept_references);
     //save map data to be accessed later
     self::save_to_json_file($concept_data_object_maps, "map_counts");
     unset($concept_data_object_maps);
     //convert associative array to a regular array
     $data_type_order_in_file = array("text", "video", "sound", "flash", "youtube", "iucn");
     foreach ($concept_data_object_counts as $taxon_concept_id => $taxon_object_counts) {
         $new_value = "";
         foreach ($data_type_order_in_file as $data_type) {
             $new_value .= "\t" . @$taxon_object_counts[$data_type]['total'];
             $new_value .= "\t" . @$taxon_object_counts[$data_type]['t'];
             $new_value .= "\t" . @$taxon_object_counts[$data_type]['ut'];
             $new_value .= "\t" . @$taxon_object_counts[$data_type]['ur'];
             $new_value .= "\t" . @$taxon_object_counts[$data_type]['total_w'];
             $new_value .= "\t" . @$taxon_object_counts[$data_type]['t_w'];
             $new_value .= "\t" . @$taxon_object_counts[$data_type]['ut_w'];
             $new_value .= "\t" . @$taxon_object_counts[$data_type]['ur_w'];
         }
         $concept_data_object_counts[$taxon_concept_id] = $new_value;
     }
     print "\n get_data_objects_count():" . (time_elapsed() - $time_start) / 60 . " minutes";
     self::save_totals_to_cumulative_txt($concept_data_object_counts, "tpm_data_objects");
     unset($concept_data_object_counts);
 }
 private function lookup_links($start, $limit)
 {
     $hierarchy_ids = array(771, 759, 431, 123, 903, 596, 410, 143, 860);
     // COL 2011, NCBI, WORMS, Wikipedia, ITIS, IF, Wikimedia Commons, fishbase, avibase
     $query = "\n            SELECT tc.id, he.id hierarchy_entry_id, he.identifier, he.source_url, h.label, h.outlink_uri, res.title, h.id, n.string,\n              cf.string, he_parent.id parent_hierarchy_entry_id, he_parent.taxon_concept_id, tr.label, tcm.richness_score\n            FROM taxon_concepts tc\n            JOIN hierarchy_entries he ON (tc.id=he.taxon_concept_id)\n            JOIN names n ON (he.name_id=n.id)\n            JOIN hierarchies h ON (he.hierarchy_id=h.id)\n            LEFT JOIN taxon_concept_metrics tcm ON (tc.id=tcm.taxon_concept_id)\n            LEFT JOIN resources res ON (h.id=res.hierarchy_id)\n            LEFT JOIN canonical_forms cf ON (n.ranked_canonical_form_id=cf.id)\n            LEFT JOIN hierarchy_entries he_parent ON (he.parent_id=he_parent.id)\n            LEFT JOIN (ranks r JOIN translated_ranks tr ON (r.id = tr.rank_id AND tr.language_id=152)) ON (he.rank_id=r.id)\n            WHERE tc.id BETWEEN {$start} AND " . ($start + $limit) . "\n            AND he.published=1 AND he.visibility_id=" . Visibility::visible()->id . "\n            AND he.hierarchy_id IN (" . implode(",", $hierarchy_ids) . ")";
     $links_from_hierarchy = array();
     static $j = 0;
     foreach ($this->mysqli->iterate_file($query) as $row_num => $row) {
         if ($j % 10000 == 0) {
             echo "{$start} : {$j} : " . time_elapsed() . " : " . memory_get_usage() . "\n";
         }
         $j++;
         $taxon_concept_id = $row[0];
         $hierarchy_entry_id = $row[1];
         $identifier = $row[2];
         $source_url = $row[3];
         $hierarchy_label = $row[4];
         $outlink_uri = $row[5];
         $resource_title = $row[6];
         $hierarchy_id = $row[7];
         $string = trim($row[8]);
         $ranked_canonical_form = trim($row[9]);
         $parent_hierarchy_entry_id = $row[10];
         $parent_page_id = $row[11];
         $rank_label = $row[12];
         $richness_score = $row[13];
         if (!$parent_hierarchy_entry_id || $parent_hierarchy_entry_id == "NULL") {
             $parent_hierarchy_entry_id = 0;
         }
         if (!$parent_page_id || $parent_page_id == "NULL") {
             $parent_page_id = 0;
         }
         if (!$richness_score || $richness_score == "NULL") {
             $richness_score = 0;
         }
         if (!$rank_label || $rank_label == "NULL") {
             $rank_label = '';
         }
         if ($rank_label == "gen.") {
             $rank_label = 'genus';
         }
         if ($rank_label == "sp.") {
             $rank_label = 'species';
         }
         if ($rank_label == "subsp.") {
             $rank_label = 'subspecies';
         }
         if ($rank_label == "var.") {
             $rank_label = 'variety';
         }
         $richness_score = round($richness_score * 100, 2);
         if ($resource_title != 'NULL' && ($t = trim($resource_title))) {
             $title = $t;
         } elseif ($hierarchy_label != 'NULL' && ($t = trim($hierarchy_label))) {
             $title = $t;
         }
         if (@$links_from_hierarchy[$taxon_concept_id][$hierarchy_id]) {
             continue;
         }
         $name = null;
         if ($ranked_canonical_form != 'NULL' && !Name::is_surrogate($ranked_canonical_form)) {
             $name = $ranked_canonical_form;
         } elseif ($string != 'NULL') {
             $name = $string;
         }
         if (!$name) {
             continue;
         }
         fwrite($this->LINKS_OUT, "{$taxon_concept_id}\t{$hierarchy_entry_id}\t{$name}\t{$parent_page_id}\t{$parent_hierarchy_entry_id}\t{$richness_score}\t{$rank_label}\t{$identifier}\t{$title}\t");
         if ($link = $this->prepare_link($row)) {
             fwrite($this->LINKS_OUT, $link['url']);
         }
         fwrite($this->LINKS_OUT, "\n");
         $links_from_hierarchy[$taxon_concept_id][$hierarchy_id] = 1;
     }
 }
Example #14
0
 function get_data_from_result($outfile, $delete = true)
 {
     $parent_ids = array();
     $last_hierarchy_entry_id = 0;
     $top_images = array();
     $top_unpublished_images = array();
     $hierarchy_entry_ids = array();
     $visible_id = Visibility::visible()->id;
     if (!($RESULT = fopen($outfile, "r"))) {
         debug(__CLASS__ . ":" . __LINE__ . ": Couldn't open file: " . $outfile);
         return;
     }
     while (!feof($RESULT)) {
         if ($line = fgets($RESULT, 4096)) {
             //he.id hierarchy_entry_id, he.parent_id, do.id, do.data_rating, do.visibility_id, do.vetted_id, do.published
             $fields = explode("\t", trim($line));
             $hierarchy_entry_id = $fields[0];
             $parent_id = $fields[1];
             $data_object_id = $fields[2];
             $data_rating = $fields[3];
             $visibility_id = $fields[4];
             $vetted_id = $fields[5];
             $published = $fields[6];
             if ($parent_id) {
                 $parent_ids[$parent_id] = 1;
             }
             // this is a new entry so commit existing data before adding more
             if ($hierarchy_entry_id != $last_hierarchy_entry_id) {
                 $this->process_top_images($top_images, $top_unpublished_images);
                 if ($top_images) {
                     $hierarchy_entry_ids[] = $last_hierarchy_entry_id;
                 }
                 $last_hierarchy_entry_id = $hierarchy_entry_id;
                 unset($top_images);
                 unset($top_unpublished_images);
                 unset($used_data_objects);
                 $top_images = array();
                 $top_unpublished_images = array();
                 $used_data_objects = array();
             }
             if (isset($used_data_objects[$data_object_id])) {
                 continue;
             }
             $used_data_objects[$data_object_id] = 1;
             $vetted_sort_order = isset($this->vetted_sort_orders[$vetted_id]) ? $this->vetted_sort_orders[$vetted_id] : 5;
             if ($visibility_id == $visible_id && $published == 1) {
                 $top_images[$hierarchy_entry_id][$vetted_sort_order][$data_rating][$data_object_id] = "{$hierarchy_entry_id}\t{$data_object_id}";
             } else {
                 $top_unpublished_images[$hierarchy_entry_id][$vetted_sort_order][$data_rating][$data_object_id] = "{$hierarchy_entry_id}\t{$data_object_id}";
             }
         }
     }
     fclose($RESULT);
     unlink($outfile);
     $this->process_top_images($top_images, $top_unpublished_images);
     if ($top_images) {
         $hierarchy_entry_ids[] = $last_hierarchy_entry_id;
     }
     if ($delete) {
         $split_ids = array_chunk($hierarchy_entry_ids, 5000);
         while (list($key, $chunk) = each($split_ids)) {
             $this->mysqli->delete("DELETE FROM top_images_tmp WHERE hierarchy_entry_id IN (" . implode($chunk, ",") . ")");
             $this->mysqli->delete("DELETE FROM top_unpublished_images_tmp WHERE hierarchy_entry_id IN (" . implode($chunk, ",") . ")");
         }
     }
     return $parent_ids;
 }
Example #15
0
 public function total_user_added_data()
 {
     return $this->mysqli_slave->select_value("\n            SELECT COUNT(*) count\n            FROM user_added_data\n            WHERE visibility_id = " . Visibility::visible()->id . "\n                AND deleted_at IS NULL");
 }