function quality_report($id, $status_filter = null) { //$data['report'] = $this->getDataSourceReport($id); $data['title'] = 'Data Source Report'; $data['scripts'] = array(); $data['less'] = array('charts'); $data['js_lib'] = array('core'); $this->load->model("data_source/data_sources", "ds"); $this->load->model("registry_object/registry_objects", "ro"); if ($status_filter) { $data['filter'] = "Quality report for " . readable($status_filter); } $report = array(); $data['ds'] = $this->ds->getByID($id); $ids = $this->ro->getIDsByDataSourceID($id, false, 'All'); acl_enforce('REGISTRY_USER'); ds_acl_enforce((int) $id); if ($ids) { $data['record_count'] = sizeof($ids); $problems = 0; $replacements = array("recommended" => "<u>recommended</u>", "required" => "<u>required</u>", "must be" => "<u>must be</u>"); foreach ($ids as $idx => $ro_id) { try { $ro = $this->ro->getByID($ro_id); if (!$status_filter || $ro->status == $status_filter) { $report_html = $ro ? str_replace(array_keys($replacements), array_values($replacements), $ro->getMetadata('quality_html')) : ''; $report[$ro_id] = array('quality_level' => $ro->quality_level == 4 ? 'Gold Standard' : $ro->quality_level, 'class' => $ro->class, 'title' => $ro->title, 'status' => readable($ro->status), 'id' => $ro->id, 'report' => $report_html); } } catch (Exception $e) { throw new Exception($e); } unset($ro); clean_cycles(); } } uasort($report, array($this, 'cmpByQualityLevel')); $data['report'] = $report; $this->load->view('detailed_quality_report', $data); }
function enrichDS($data_source_id) { //TODO: XXX acl_enforce('REGISTRY_STAFF'); header('Cache-Control: no-cache, must-revalidate'); header('Content-type: application/json'); $this->load->model('registry_object/registry_objects', 'ro'); $this->load->model('data_source/data_sources', 'ds'); $ids = $this->ro->getIDsByDataSourceID($data_source_id); if ($ids) { /* TWO-STAGE ENRICH */ foreach ($ids as $ro_id) { try { $ro = $this->ro->getByID($ro_id); if ($ro->getRif()) { $ro->addRelationships(); unset($ro); gc_collect_cycles(); clean_cycles(); } } catch (Exception $e) { echo "<pre>error in: {$e}" . nl2br($e->getMessage()) . "</pre>" . BR; } } foreach ($ids as $ro_id) { try { $ro = $this->ro->getByID($ro_id); if ($ro->getRif()) { $ro->update_quality_metadata(); $ro->enrich(); unset($ro); gc_collect_cycles(); clean_cycles(); } } catch (Exception $e) { echo "<pre>error in: {$e}" . nl2br($e->getMessage()) . "</pre>" . BR; } } } }
function importPayloadToDataSource($data_source_id, $xml, $harvestID = '', $debug = false, $mode = 'HARVEST') { ob_start(); $this->output->enable_profiler(FALSE); $this->load->model('registry_object/registry_objects', 'ro'); $this->load->model('registry_object/registry_objects', 'oldRo'); $this->load->model('registry_object/rifcs', 'rifcs'); $this->load->model('data_source/data_sources', 'ds'); $imput = $xml; bench(0); $timewaiting = 0; $record_count = 0; $reg_obj_count = 0; $duplicate_record_count = 0; // An array of record ids created in this harvest (used in two-phase harvesting) $harvested_record_ids = array(); gc_enable(); // XXX: COMMENTME if ($harvestID == '') { $harvestID = "MANUAL-" . time(); } /* PHASE ONE => HARVEST THE RECORD IN ISOLATION */ $data_source = $this->ds->getByID($data_source_id); try { // Validate $this->validateRIFCSXML($xml); $sxml = $this->getSimpleXMLFromString($xml); $status = $this->getDefaultRecordStatusForDataSource($data_source); foreach ($sxml->xpath('//registryObject') as $registryObject) { // Determine the registry object class $reg_obj_count++; $ro_class = NULL; if ($mode == 'HARVEST') { foreach ($this->ro->valid_classes as $class) { if (property_exists($registryObject, $class)) { $ro_class = $class; } foreach ($registryObject->{$class} as $ro_xml) { // Flag records that are duplicates within this harvest and choose not to harvest them again (repeated keys in single harvest are dumb!) $reharvest = true; if ($oldRo = $this->oldRo->getByKey((string) $registryObject->key)) { $oldharvestID = $oldRo->getAttribute("harvest_id"); if ($oldharvestID == $harvestID) { $reharvest = false; } // XXX: Record ownership, reject if record already exists within the registry } if ($reharvest) { // XXX: Record owner should only be system if this is a harvest? $record_owner = "SYSTEM"; // Create a frame instance of the registryObject $ro = $this->ro->create($data_source, (string) $registryObject->key, $ro_class, "", $status, "defaultSlug", $record_owner, $harvestID); $ro->created_who = $record_owner; $ro->data_source_key = $data_source->key; $ro->group = (string) $registryObject['group']; $ro->setAttribute("harvest_id", $harvestID); // Order is important here! $changed = $ro->updateXML($registryObject->asXML()); // Generate the list and display titles first, then the SLUG $ro->updateTitles(); $ro->generateSlug(); $ro->processIdentifiers(); // Save all our attributes to the object $ro->save($changed); // Add this record to our counts, etc. $harvested_record_ids[] = $ro->id; $record_count++; // Memory management... unset($ro); } else { // XXX: Verbose message? $duplicate_record_count++; } } } } } // Clean up our memory objects... unset($sxml); unset($xml); gc_collect_cycles(); } catch (Exception $e) { $data_source->append_log("UNABLE TO HARVEST FROM THIS DATA SOURCE" . NL . $e->getMessage() . NL, "importer", "HARVEST_ERROR"); throw new Exception("UNABLE TO HARVEST FROM THIS DATA SOURCE" . NL . $e->getMessage() . NL); } /* PHASE TWO - ENRICH THE HARVESTED RECORD IN LIGHT OF ITS CONTEXT */ try { // Only enrich records received in this harvest foreach ($harvested_record_ids as $ro_id) { $ro = $this->ro->getByID($ro_id); $ro->addRelationships(); // XXX: re-enrich records which are related to this one $ro->update_quality_metadata(); // spatial resooultion, center, coords in enrich? $ro->determineSpatialExtents(); // vocab indexing resolution // Generate extrif $ro->enrich(); unset($ro); clean_cycles(); } gc_collect_cycles(); } catch (Exception $e) { $data_source->append_log("UNABLE TO HARVEST FROM THIS DATA SOURCE" . NL . $e->getMessage() . NL, "importer", "HARVEST_ERROR"); throw new Exception("UNABLE TO HARVEST FROM THIS DATA SOURCE" . NL . $e->getMessage() . NL); } // Index the datasource we just harvested?? XXX: Should this just index the records enriched? //$this->indexDS($data_source_id); echo (double) bench(0) - (double) $timewaiting . " seconds to harvest " . NL; echo $reg_obj_count . " received " . NL . $record_count . " records inserted " . NL; if ($duplicate_record_count > 0) { echo $duplicate_record_count . " records ignored" . NL; } if ($reg_obj_count == 0) { //echo "INPUT " .$imput; echo "DONE WITH ERRORS" . NL; } else { echo "DONE" . NL; } return ob_get_clean(); }
public function _enrichAffectedRecords() { $this->CI->load->model('registry_object/registry_objects', 'ro'); if ($this->runBenchMark) { $this->CI->benchmark->mark('enrich_affected_records_start'); } $this->affected_record_keys = array_unique(array_merge($this->imported_record_keys, $this->affected_record_keys)); $this->affected_record_keys = array_unique(array_diff($this->affected_record_keys, $this->deleted_record_keys)); foreach ($this->affected_record_keys as $ro_key) { $registryObjects = $this->CI->ro->getAllByKey($ro_key); if (is_array($registryObjects)) { foreach ($registryObjects as $ro) { //imported records already got their relationships handled if (!in_array($ro->key, $this->imported_record_keys)) { try { $ro->addRelationships(); } catch (Exception $e) { throw new Exception($e); } } if ($this->runBenchMark) { $this->roQACount++; $this->CI->benchmark->mark('ro_qa_start'); } $ro->update_quality_metadata($this->runBenchMark); if ($this->runBenchMark) { $this->CI->benchmark->mark('ro_qa_end'); $this->roQATime += $this->CI->benchmark->elapsed_time('ro_qa_start', 'ro_qa_end'); $this->roQAS1Time += $this->CI->benchmark->elapsed_time('ro_qa_start', 'ro_qa_s1_end'); $this->roQAS2Time += $this->CI->benchmark->elapsed_time('ro_qa_s1_end', 'ro_qa_s2_end'); $this->roQAS3Time += $this->CI->benchmark->elapsed_time('ro_qa_s2_end', 'ro_qa_s3_end'); $this->roQAS4Time += $this->CI->benchmark->elapsed_time('ro_qa_s3_end', 'ro_qa_end'); } if ($this->runBenchMark) { $this->roEnrichCount++; $this->CI->benchmark->mark('ro_enrich_start'); } try { $ro->enrich($this->runBenchMark); } catch (Exception $e) { throw new Exception($e); } if ($this->runBenchMark) { $this->CI->benchmark->mark('ro_enrich_end'); $this->roEnrichTime += $this->CI->benchmark->elapsed_time('ro_enrich_start', 'ro_enrich_end'); $this->roEnrichS1Time += $this->CI->benchmark->elapsed_time('ro_enrich_start', 'ro_enrich_s1_end'); $this->roEnrichS2Time += $this->CI->benchmark->elapsed_time('ro_enrich_s1_end', 'ro_enrich_s2_end'); $this->roEnrichS3Time += $this->CI->benchmark->elapsed_time('ro_enrich_s2_end', 'ro_enrich_s3_end'); $this->roEnrichS4Time += $this->CI->benchmark->elapsed_time('ro_enrich_s3_end', 'ro_enrich_s4_end'); $this->roEnrichS5Time += $this->CI->benchmark->elapsed_time('ro_enrich_s4_end', 'ro_enrich_s5_end'); $this->roEnrichS6Time += $this->CI->benchmark->elapsed_time('ro_enrich_s5_end', 'ro_enrich_s6_end'); $this->roEnrichS7Time += $this->CI->benchmark->elapsed_time('ro_enrich_s6_end', 'ro_enrich_end'); } unset($ro); clean_cycles(); } } } if ($this->runBenchMark) { $this->CI->benchmark->mark('enrich_affected_records_end'); } }
function enrich($data_source_id) { echo "ENRICHING..." . NL; $this->exec_time = microtime(true); $this->load->model('registry_object/registry_objects', 'ro'); $this->load->model('data_source/data_sources', 'ds'); $ids = $this->ro->getIDsByDataSourceID($data_source_id); if ($ids) { /* TWO-STAGE ENRICH */ echo '----STAGE 1-----' . NL . NL; foreach ($ids as $ro_id) { echo '.'; try { $ro = $this->ro->getByID($ro_id); if ($ro->getRif()) { $ro->addRelationships(); unset($ro); } } catch (Exception $e) { echo "<pre>error in: {$e}" . nl2br($e->getMessage()) . "</pre>" . BR; } } echo '----STAGE 2----' . NL . NL; foreach ($ids as $ro_id) { echo '*'; try { $ro = $this->ro->getByID($ro_id); if ($ro->getRif()) { $ro->update_quality_metadata(); echo "^"; $ro->enrich(); unset($ro); gc_collect_cycles(); clean_cycles(); } } catch (Exception $e) { echo "<pre>error in: {$e}" . nl2br($e->getMessage()) . "</pre>" . BR; } } } }