コード例 #1
0
ファイル: BOLDSysAPI.php プロジェクト: eliagbayani/maps_test
 function save_dna_sequence_from_big_xml()
 {
     echo "\n\n saving dna sequence from big xml file...\n";
     // from 212.php this file will always be re-created
     require_library('connectors/BoldsImagesAPIv2');
     $func = new BoldsImagesAPIv2();
     $path = $func->download_and_extract_remote_file();
     echo "\n\n {$path}";
     $reader = new \XMLReader();
     $reader->open($path);
     $taxa_sequences = array();
     while (@$reader->read()) {
         if ($reader->nodeType == \XMLReader::ELEMENT && $reader->name == "record") {
             $string = $reader->readOuterXML();
             $xml = simplexml_load_string($string);
             $best_sequence = "";
             if (@$xml->sequences->sequence) {
                 if ($taxon_id = trim(@$xml->taxonomy->species->taxon->taxon_id)) {
                 } elseif ($taxon_id = trim(@$xml->taxonomy->genus->taxon->taxon_id)) {
                 } elseif ($taxon_id = trim(@$xml->taxonomy->subfamily->taxon->taxon_id)) {
                 } elseif ($taxon_id = trim(@$xml->taxonomy->family->taxon->taxon_id)) {
                 } elseif ($taxon_id = trim(@$xml->taxonomy->order->taxon->taxon_id)) {
                 } elseif ($taxon_id = trim(@$xml->taxonomy->class->taxon->taxon_id)) {
                 } elseif ($taxon_id = trim(@$xml->taxonomy->phylum->taxon->taxon_id)) {
                 } elseif ($taxon_id = trim(@$xml->taxonomy->kingdom->taxon->taxon_id)) {
                 }
                 $i = 0;
                 foreach (@$xml->sequences->sequence as $sequence) {
                     $i++;
                     if ($sequence->markercode == "COI-5P") {
                         if (strlen($best_sequence) < strlen($sequence->nucleotides)) {
                             $best_sequence = trim($sequence->nucleotides);
                         }
                     }
                 }
                 if ($best_sequence) {
                     if (@$taxa_sequences[$taxon_id]) {
                         $old = $taxa_sequences[$taxon_id]["s"];
                         if (strlen($old) < strlen($best_sequence)) {
                             $taxa_sequences[$taxon_id]["s"] = $best_sequence;
                         }
                         $taxa_sequences[$taxon_id]["c"] += $i;
                     } else {
                         $taxa_sequences[$taxon_id]["s"] = $best_sequence;
                         $taxa_sequences[$taxon_id]["c"] = $i;
                     }
                 }
             }
         }
     }
     self::save_to_json_file($taxa_sequences, $this->SAVED_SEQUENCES_FILE);
     unlink($path);
 }
コード例 #2
0
 private function get_texts()
 {
     require_library('connectors/BoldsImagesAPIv2');
     $path = BoldsImagesAPIv2::download_and_extract_remote_file($this->original_resource);
     if ($xml = Functions::lookup_with_cache($path, array('timeout' => 172800, 'download_attempts' => 2, 'delay_in_minutes' => 3))) {
         $xml = simplexml_load_string($xml);
         $total = count($xml->taxon);
         $i = 0;
         foreach ($xml->taxon as $t) {
             $i++;
             echo "\n {$i} of {$total}";
             $do_count = sizeof($t->dataObject);
             if ($do_count > 0) {
                 $t_dwc = $t->children("http://rs.tdwg.org/dwc/dwcore/");
                 $t_dc = $t->children("http://purl.org/dc/elements/1.1/");
                 $taxonID = (string) trim($t_dc->identifier);
                 $source = self::clean_str("http://www.bioimages.org.uk/html/" . str_replace(" ", "_", Functions::canonical_form($t_dwc->ScientificName)) . ".htm");
                 //---------------------------------
                 $taxon = new \eol_schema\Taxon();
                 $taxon->taxonID = $taxonID;
                 $taxon->scientificName = $t_dwc->ScientificName;
                 $taxon->kingdom = $t_dwc->Kingdom;
                 $taxon->phylum = $t_dwc->Phylum;
                 $taxon->class = $t_dwc->Class;
                 $taxon->order = $t_dwc->Order;
                 $taxon->family = $t_dwc->Family;
                 $taxon->furtherInformationURL = $source;
                 echo "\n {$taxon->taxonID} - {$taxon->scientificName} [{$source}]";
                 if (isset($this->taxa[$taxonID])) {
                     echo " -- already exists";
                 } else {
                     $this->taxa[$taxonID] = $taxon;
                 }
                 //---------------------------------
                 foreach ($t->dataObject as $do) {
                     if ($do->dataType != "http://purl.org/dc/dcmitype/Text") {
                         continue;
                     }
                     $t_dc2 = $do->children("http://purl.org/dc/elements/1.1/");
                     $t_dcterms = $do->children("http://purl.org/dc/terms/");
                     //---------------------------
                     $agent_ids = array();
                     $r = new \eol_schema\Agent();
                     $r->term_name = $do->agent;
                     $r->identifier = md5("{$do->agent}|{$do->agent}['role']");
                     $r->agentRole = $do->agent['role'];
                     $r->term_homepage = "http://www.bioimages.org.uk/index.htm";
                     $agent_ids[] = $r->identifier;
                     if (!in_array($r->identifier, $this->resource_agent_ids)) {
                         $this->resource_agent_ids[] = $r->identifier;
                         $this->archive_builder->write_object_to_file($r);
                     }
                     //---------------------------
                     $text_identifier = self::clean_str($t_dc2->identifier);
                     if (in_array($text_identifier, $this->media_ids)) {
                         continue;
                     } else {
                         $this->media_ids[] = $text_identifier;
                     }
                     $mr = new \eol_schema\MediaResource();
                     if ($agent_ids) {
                         $mr->agentID = implode("; ", $agent_ids);
                     }
                     $mr->taxonID = $taxonID;
                     $mr->identifier = $text_identifier;
                     $mr->type = (string) "http://purl.org/dc/dcmitype/Text";
                     //$do->dataType;
                     $mr->language = "en";
                     $mr->format = "text/html";
                     //$do->mimeType;
                     $mr->furtherInformationURL = (string) trim($source);
                     /* very long text objects, temporarily ignored */
                     $problematic_objects = array("http://www.bioimages.org.uk/html/Betula.htm", "http://www.bioimages.org.uk/html/Broadleaved_trees.htm", "http://www.bioimages.org.uk/html/Fagus.htm", "http://www.bioimages.org.uk/html/Pinopsida.htm", "http://www.bioimages.org.uk/html/Poaceae.htm", "http://www.bioimages.org.uk/html/Quercus.htm", "http://www.bioimages.org.uk/html/Salix.htm", "http://www.bioimages.org.uk/html/Trees.htm");
                     if (in_array($mr->furtherInformationURL, $problematic_objects)) {
                         continue;
                     }
                     $mr->CVterm = "http://rs.tdwg.org/ontology/voc/SPMInfoItems#Associations";
                     $mr->Owner = "BioImages";
                     $mr->title = "Associations";
                     $mr->UsageTerms = "http://creativecommons.org/licenses/by-nc-sa/3.0/";
                     // $mr->audience       = 'Everyone';
                     // $mr->accessURI      = $source;
                     $description = (string) $t_dc2->description;
                     $description = trim(self::clean_str(utf8_encode($description)));
                     if (!$description) {
                         continue;
                     } else {
                         $mr->description = $description;
                         $this->archive_builder->write_object_to_file($mr);
                     }
                 }
             }
         }
     } else {
         echo "\n Down: " . $this->original_resource;
     }
     unlink($path);
     echo "\n temporary XML file removed: [{$path}]\n";
 }