public function __construct() { $this->mysqli =& $GLOBALS['mysqli_connection']; if ($GLOBALS['ENV_NAME'] == 'production' && environment_defined('slave')) { $this->mysqli_slave = load_mysql_environment('slave'); } else { $this->mysqli_slave =& $this->mysqli; } $this->sparql_client = SparqlClient::connection(); $this->published_id = TranslatedResourceStatus::find_or_create_by_label('Published')->id; $this->trusted_id = Vetted::trusted()->id; $this->unknown_id = Vetted::unknown()->id; $this->untrusted_id = Vetted::untrusted()->id; $this->visible_id = Visibility::visible()->id; $this->invisible_id = Visibility::invisible()->id; $this->master_curator_id = CuratorLevel::master_curator()->id; $this->full_curator_id = CuratorLevel::full_curator()->id; $this->assistant_curator_id = CuratorLevel::assistant_curator()->id; $this->curator_ids = CuratorLevel::curator_ids(); $this->data_object_scope = ChangeableObjectType::data_object_scope(); $this->worms_content_partner_id = ContentPartner::find_or_create_by_full_name('World Register of Marine Species')->id; $this->col_hierarchy_id = Hierarchy::find_or_create_by_label('Species 2000 & ITIS Catalogue of Life: Annual Checklist 2011')->id; $this->latest_harvest_event_ids(); $this->worms_latest_harvest_event_id(); }
public function __construct() { $this->mysqli =& $GLOBALS['db_connection']; if ($GLOBALS['ENV_NAME'] == 'production' && environment_defined('slave')) { $this->mysqli_slave = load_mysql_environment('slave'); } else { $this->mysqli_slave =& $this->mysqli; } $this->vetted_sort_orders = array(); $this->vetted_sort_orders[Vetted::trusted()->id] = 1; $this->vetted_sort_orders[Vetted::unknown()->id] = 2; $this->vetted_sort_orders[Vetted::untrusted()->id] = 3; }
function get_word_count($taxon_concept_id, $chapter) { $concept_data_object_counts = array(); $text_id = DataType::find_or_create_by_schema_value('http://purl.org/dc/dcmitype/Text')->id; $trusted_id = Vetted::trusted()->id; $untrusted_id = Vetted::untrusted()->id; $unreviewed_id = Vetted::unknown()->id; if ($chapter == "brief summary") { $toc_id = TranslatedTableOfContent::find_or_create_by_label('Brief Summary')->table_of_contents_id; } elseif ($chapter == "comprehensive description") { $toc_id = TranslatedTableOfContent::find_or_create_by_label('Comprehensive Description')->table_of_contents_id; } $query = "SELECT dotoc.toc_id,do.description, dohe.vetted_id FROM data_objects_taxon_concepts dotc \n JOIN data_objects do ON dotc.data_object_id = do.id LEFT JOIN data_objects_table_of_contents dotoc ON do.id = dotoc.data_object_id \n JOIN data_objects_hierarchy_entries dohe on do.id = dohe.data_object_id\n WHERE do.published = 1 AND dohe.visibility_id =" . Visibility::visible()->id . " AND do.data_type_id = {$text_id} AND dotc.taxon_concept_id = {$taxon_concept_id} AND dotoc.toc_id = {$toc_id}\n UNION\n SELECT dotoc.toc_id,do.description, udo.vetted_id FROM data_objects_taxon_concepts dotc \n JOIN data_objects do ON dotc.data_object_id = do.id LEFT JOIN data_objects_table_of_contents dotoc ON do.id = dotoc.data_object_id \n JOIN users_data_objects udo on do.id = udo.data_object_id\n WHERE do.published = 1 AND udo.visibility_id =" . Visibility::visible()->id . " AND do.data_type_id = {$text_id} AND dotc.taxon_concept_id = {$taxon_concept_id} AND dotoc.toc_id = {$toc_id}"; $result = $this->mysqli_slave->query($query); while ($result && ($row = $result->fetch_assoc())) { $description = $row['description']; $vetted_id = $row['vetted_id']; $words_count = str_word_count(strip_tags($description), 0); @($concept_data_object_counts['total_w'] += $words_count); if ($vetted_id == $trusted_id) { @($concept_data_object_counts['t_w'] += $words_count); } elseif ($vetted_id == $untrusted_id) { @($concept_data_object_counts['ut_w'] += $words_count); } elseif ($vetted_id == $unreviewed_id) { @($concept_data_object_counts['ur_w'] += $words_count); } } return @$concept_data_object_counts['total_w']; }
function get_data_objects_count($batch_size = 100000) { $image_id = DataType::image()->id; $text_id = DataType::text()->id; $video_id = DataType::video()->id; $sound_id = DataType::sound()->id; $flash_id = DataType::flash()->id; $youtube_id = DataType::youtube()->id; $iucn_id = DataType::iucn()->id; $data_type_label[$text_id] = 'text'; $data_type_label[$video_id] = 'video'; $data_type_label[$sound_id] = 'sound'; $data_type_label[$flash_id] = 'flash'; $data_type_label[$youtube_id] = 'youtube'; $data_type_label[$iucn_id] = 'iucn'; $data_type_order_in_file = array("text", "video", "sound", "flash", "youtube", "iucn"); $trusted_id = Vetted::trusted()->id; $untrusted_id = Vetted::untrusted()->id; $unreviewed_id = Vetted::unknown()->id; $raw_stats = array(); $concept_info_items = array(); $concept_references = array(); for ($i = $this->min_taxon_concept_id; $i <= $this->max_taxon_concept_id; $i += $batch_size) { $this->print_status($i, $batch_size); $sql = "SELECT do.guid,\n dotc.taxon_concept_id,\n do.data_type_id,\n doii.info_item_id,\n dor.ref_id,\n REPLACE(REPLACE(do.description, '\\\\n', ' '), '\\\\r', ' '),\n dohe.vetted_id,\n do.id\n FROM data_objects_taxon_concepts dotc\n STRAIGHT_JOIN data_objects do ON (dotc.data_object_id = do.id)\n JOIN data_objects_hierarchy_entries dohe ON (do.id=dohe.data_object_id)\n LEFT JOIN data_objects_info_items doii ON (do.id = doii.data_object_id)\n LEFT JOIN data_objects_refs dor ON (do.id = dor.data_object_id)\n WHERE do.published = 1 AND dohe.visibility_id = " . Visibility::visible()->id . " AND do.data_type_id != {$image_id}"; if ($this->test_taxon_concept_ids) { $sql .= " AND dotc.taxon_concept_id IN (" . $this->test_taxon_concept_ids . ")"; } else { $sql .= " AND dotc.taxon_concept_id BETWEEN {$i} AND " . ($i + $batch_size); } $counted_data_objects = array(); foreach ($this->mysqli_slave->iterate_file($sql) as $row_number => $row) { $taxon_concept_id = trim($row[0]); $data_type_id = trim($row[1]); $info_item_id = trim($row[2]); $ref_id = trim($row[3]); $description = trim($row[4]); $vetted_id = trim($row[5]); $data_object_id = trim($row[6]); if (isset($counted_data_objects[$taxon_concept_id][$data_object_id])) { continue; } $counted_data_objects[$taxon_concept_id][$data_object_id] = 1; $label = @$data_type_label[$data_type_id]; $words_count = str_word_count(strip_tags($description), 0); @$raw_stats[$taxon_concept_id][$label]['total']++; @($raw_stats[$taxon_concept_id][$label]['total_w'] += $words_count); if ($vetted_id == $trusted_id) { @$raw_stats[$taxon_concept_id][$label]['t']++; @($raw_stats[$taxon_concept_id][$label]['t_w'] += $words_count); } elseif ($vetted_id == $untrusted_id) { @$raw_stats[$taxon_concept_id][$label]['ut']++; @($raw_stats[$taxon_concept_id][$label]['ut_w'] += $words_count); } elseif ($vetted_id == $unreviewed_id) { @$raw_stats[$taxon_concept_id][$label]['ur']++; @($raw_stats[$taxon_concept_id][$label]['ur_w'] += $words_count); } $concept_info_items[$taxon_concept_id][$info_item_id] = ''; $concept_references[$taxon_concept_id][$ref_id] = ''; } foreach ($raw_stats as $taxon_concept_id => $stats) { $new_value = ""; # the stats need to go into the file in a certain order to be imported into the MySQL table foreach ($data_type_order_in_file as $data_type) { $new_value = @$stats[$data_type]['total']; $new_value .= "\t" . @$stats[$data_type]['t']; $new_value .= "\t" . @$stats[$data_type]['ut']; $new_value .= "\t" . @$stats[$data_type]['ur']; $new_value .= "\t" . @$stats[$data_type]['total_w']; $new_value .= "\t" . @$stats[$data_type]['t_w']; $new_value .= "\t" . @$stats[$data_type]['ut_w']; $new_value .= "\t" . @$stats[$data_type]['ur_w']; } $raw_stats[$taxon_concept_id] = $new_value; } $this->save_category_stats($raw_stats, "get_data_objects_count"); $raw_stats = array(); if ($this->test_taxon_concept_ids) { break; } } // $this->save_to_json_file($concept_info_items, "concept_info_items"); // unset($concept_info_items); // // $this->save_to_json_file($concept_references, "concept_references"); // unset($concept_references); }
function get_data_objects_count($batch_size = 100000) { $time_start = time_elapsed(); $concept_data_object_counts = array(); $concept_data_object_maps = array(); $concept_info_items = array(); $concept_references = array(); $image_id = DataType::image()->id; $map_id = DataType::map()->id; $text_id = DataType::text()->id; $video_id = DataType::video()->id; $sound_id = DataType::sound()->id; $flash_id = DataType::flash()->id; $youtube_id = DataType::youtube()->id; $iucn_id = DataType::iucn()->id; $data_type_label[$image_id] = 'image'; $data_type_label[$sound_id] = 'sound'; $data_type_label[$text_id] = 'text'; $data_type_label[$video_id] = 'video'; $data_type_label[$iucn_id] = 'iucn'; $data_type_label[$flash_id] = 'flash'; $data_type_label[$youtube_id] = 'youtube'; $trusted_id = Vetted::trusted()->id; $untrusted_id = Vetted::untrusted()->id; $unreviewed_id = Vetted::unknown()->id; for ($i = $this->min_taxon_concept_id; $i <= $this->max_taxon_concept_id; $i += $batch_size) { print "\n dataObjects, its infoItems, its references [2 of 14] {$i} \n"; $sql = "SELECT dotc.taxon_concept_id tc_id, do.data_type_id, doii.info_item_id, dor.ref_id, do.description, dohe.vetted_id, do.data_subtype_id\r\n FROM data_objects_taxon_concepts dotc \r\n JOIN data_objects do ON dotc.data_object_id = do.id \r\n LEFT JOIN data_objects_info_items doii ON do.id = doii.data_object_id \r\n LEFT JOIN data_objects_refs dor ON do.id = dor.data_object_id \r\n JOIN data_objects_hierarchy_entries dohe on do.id = dohe.data_object_id\r\n WHERE do.published=1 AND dohe.visibility_id=" . Visibility::visible()->id . " AND dohe.vetted_id != {$untrusted_id} "; //." AND do.data_type_id <> $image_id "; this has to be removed to count maps if (isset($GLOBALS['test_taxon_concept_ids'])) { $sql .= " and dotc.taxon_concept_id IN (" . implode(",", $GLOBALS['test_taxon_concept_ids']) . ")"; } else { $sql .= " AND dotc.taxon_concept_id BETWEEN {$i} AND " . ($i + $batch_size); } $sql .= "\r\n UNION\r\n SELECT dotc.taxon_concept_id tc_id, do.data_type_id, doii.info_item_id, dor.ref_id, do.description, udo.vetted_id, do.data_subtype_id\r\n FROM data_objects_taxon_concepts dotc \r\n JOIN data_objects do ON dotc.data_object_id = do.id \r\n LEFT JOIN data_objects_info_items doii ON do.id = doii.data_object_id \r\n LEFT JOIN data_objects_refs dor ON do.id = dor.data_object_id \r\n JOIN users_data_objects udo on do.id = udo.data_object_id\r\n WHERE do.published=1 AND udo.visibility_id=" . Visibility::visible()->id . "\r\n "; if (isset($GLOBALS['test_taxon_concept_ids'])) { $sql .= " and dotc.taxon_concept_id IN (" . implode(",", $GLOBALS['test_taxon_concept_ids']) . ")"; } else { $sql .= " AND dotc.taxon_concept_id BETWEEN {$i} AND " . ($i + $batch_size); } $outfile = $this->mysqli_slave->select_into_outfile($sql); $FILE = fopen($outfile, "r"); if (!$FILE) { print "!! ERROR: Could not read {$outfile}"; debug("!! ERROR: Could not read {$outfile}"); return; } $num_rows = 0; while (!feof($FILE)) { if ($line = fgets($FILE)) { $num_rows++; $line = trim($line); $fields = explode("\t", $line); $tc_id = trim($fields[0]); $data_type_id = trim($fields[1]); $info_item_id = trim($fields[2]); $ref_id = trim($fields[3]); $description = trim($fields[4]); $vetted_id = trim($fields[5]); $data_subtype_id = trim($fields[6]); $label = @$data_type_label[$data_type_id]; if ($data_subtype_id != $map_id) { $words_count = str_word_count(strip_tags($description), 0); @$concept_data_object_counts[$tc_id][$label]['total']++; @($concept_data_object_counts[$tc_id][$label]['total_w'] += $words_count); if ($vetted_id == $trusted_id) { @$concept_data_object_counts[$tc_id][$label]['t']++; @($concept_data_object_counts[$tc_id][$label]['t_w'] += $words_count); } elseif ($vetted_id == $untrusted_id) { @$concept_data_object_counts[$tc_id][$label]['ut']++; @($concept_data_object_counts[$tc_id][$label]['ut_w'] += $words_count); } elseif ($vetted_id == $unreviewed_id) { @$concept_data_object_counts[$tc_id][$label]['ur']++; @($concept_data_object_counts[$tc_id][$label]['ur_w'] += $words_count); } $concept_info_items[$tc_id][$info_item_id] = ''; $concept_references[$tc_id][$ref_id] = ''; } else { @$concept_data_object_maps[$tc_id][$label]['total']++; if ($vetted_id == $trusted_id) { @$concept_data_object_maps[$tc_id][$label]['t']++; } elseif ($vetted_id == $untrusted_id) { @$concept_data_object_maps[$tc_id][$label]['ut']++; } elseif ($vetted_id == $unreviewed_id) { @$concept_data_object_maps[$tc_id][$label]['ur']++; } } } } fclose($FILE); unlink($outfile); print "\n num_rows: {$num_rows}"; } self::save_to_json_file($concept_info_items, "concept_info_items"); unset($concept_info_items); self::save_to_json_file($concept_references, "concept_references"); unset($concept_references); //save map data to be accessed later self::save_to_json_file($concept_data_object_maps, "map_counts"); unset($concept_data_object_maps); //convert associative array to a regular array $data_type_order_in_file = array("text", "video", "sound", "flash", "youtube", "iucn"); foreach ($concept_data_object_counts as $taxon_concept_id => $taxon_object_counts) { $new_value = ""; foreach ($data_type_order_in_file as $data_type) { $new_value .= "\t" . @$taxon_object_counts[$data_type]['total']; $new_value .= "\t" . @$taxon_object_counts[$data_type]['t']; $new_value .= "\t" . @$taxon_object_counts[$data_type]['ut']; $new_value .= "\t" . @$taxon_object_counts[$data_type]['ur']; $new_value .= "\t" . @$taxon_object_counts[$data_type]['total_w']; $new_value .= "\t" . @$taxon_object_counts[$data_type]['t_w']; $new_value .= "\t" . @$taxon_object_counts[$data_type]['ut_w']; $new_value .= "\t" . @$taxon_object_counts[$data_type]['ur_w']; } $concept_data_object_counts[$taxon_concept_id] = $new_value; } print "\n get_data_objects_count():" . (time_elapsed() - $time_start) / 60 . " minutes"; self::save_totals_to_cumulative_txt($concept_data_object_counts, "tpm_data_objects"); unset($concept_data_object_counts); }