Exemplo n.º 1
0
 /**
  *	Convert pubchem substance XML record to RDF
  **/
 function parse_substance_record(&$xml)
 {
     $root = $xml->GetXMLRoot();
     // pubchem identifier and version
     $sid = array_shift($root->xpath('//PC-Substance_sid/PC-ID/PC-ID_id'));
     $sid_version = array_shift($root->xpath('//PC-Substance_sid/PC-ID/PC-ID_version'));
     $psid = $this->getPcsNs() . $sid;
     parent::addRDF(parent::describeIndividual($psid, null, $this->getPcsVoc() . "Substance"));
     parent::addRDF(parent::triplifyString($psid, $this->getPcsVoc() . "version", parent::safeLiteral($sid_version)));
     // reference to pubchem compounds
     $pc_compounds = $root->xpath('//PC-Substance_compound/PC-Compounds/PC-Compound');
     foreach ($pc_compounds as $compound) {
         $cid = array_shift($compound->xpath('./PC-Compound_id/PC-CompoundType/PC-CompoundType_id/PC-CompoundType_id_cid'));
         $cid_type = array_shift($compound->xpath('./PC-Compound_id/PC-CompoundType/PC-CompoundType_type'));
         if ($cid != "") {
             $pcid = $this->getPccNs() . $cid;
             parent::addRDF(parent::triplify($psid, $this->getPcsVoc() . "compound", $pcid));
         }
     }
     // database cross references (xref)
     // source identifier
     $source_id = array_shift($root->xpath('//PC-Substance_source/PC-Source/PC-Source_db/PC-DBTracking/PC-DBTracking_source-id/Object-id/Object-id_str'));
     parent::addRDF(parent::triplifyString($psid, $this->getPcsVoc() . "source-identifier", parent::safeLiteral($source_id)));
     // synonyms
     $synonyms = $root->xpath('//PC-Substance_synonyms/PC-Substance_synonyms_E');
     foreach ($synonyms as $synonym) {
         parent::addRDF(parent::triplifyString($psid, $this->getPcsVoc() . "synonym", parent::safeLiteral($synonym)));
     }
     //comment
     $comments = $root->xpath('//PC-Substance_comment/PC-Substance_comment_E');
     foreach ($comments as $comment) {
         if ($comment !== "") {
             parent::addRDF(parent::triplifyString($psid, "rdfs:comment", parent::safeLiteral($comment)));
         }
     }
 }
Exemplo n.º 2
0
 function models()
 {
     $tax_ids = array("Caenorhabditis elegans" => "6239", "Mus musculus" => "10090", "Saccharomyces cerevisiae" => "4932", "Drosophila melanogaster" => "7227", "Podospora anserina" => "5145", "Mesocricetus auratus" => "10036", "Schizosaccharomyces pombe" => "4896", "Danio rerio" => "7955");
     $h = explode(",", parent::getReadFile()->read());
     $expected_columns = 8;
     if (($n = count($h)) != $expected_columns) {
         trigger_error("Found {$n} columns in gene file - expecting {$expected_columns}!", E_USER_WARNING);
         return false;
     }
     /*
     [0] GenAge ID
     [1] symbol	
     [2] name	
     [3] organism	
     [4] entrez gene id	
     [5] avg lifespan change (max obsv)	
     [6] lifespan effect	
     [7] longevity influence
     */
     while ($l = parent::getReadFile()->read(200000)) {
         $data = str_getcsv($l);
         $genage = str_pad($data[0], 4, "0", STR_PAD_LEFT);
         $gene_symbol = $data[1];
         $name = $data[2];
         $organism = $data[3];
         $ncbi_gene_id = $data[4];
         $max_percent_obsv_avg_lifespan_change = $data[5];
         $lifespan_effect = $data[6];
         $longevity_influence = $data[7];
         $genage_id = parent::getNamespace() . $genage;
         parent::addRDF(parent::describeIndividual($genage_id, $name, parent::getVoc() . "Aging-Related-Gene") . parent::describeClass(parent::getVoc() . "Aging-Related-Gene", "Aging Related Gene"));
         parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "gene-symbol", parent::safeLiteral($gene_symbol)));
         parent::addRDF(parent::triplify($genage_id, parent::getVoc() . "taxon", "ncbitaxon:" . $tax_ids[$organism]));
         if ($ncbi_gene_id !== "") {
             parent::addRDF(parent::triplify($genage_id, parent::getVoc() . "x-ncbigene", "ncbigene:" . $ncbi_gene_id));
         }
         if ($max_percent_obsv_avg_lifespan_change !== "") {
             parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "maximum-percent-observed-average-lifespan-change", parent::safeLiteral($max_percent_obsv_avg_lifespan_change)));
         }
         if ($lifespan_effect == "Increase and Decrease") {
             parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", "increase") . parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", "decrease"));
         } else {
             parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", strtolower($lifespan_effect)));
         }
         parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "longevity-influence", strtolower($longevity_influence)));
         parent::WriteRDFBufferToWriteFile();
     }
 }
Exemplo n.º 3
0
 function genes()
 {
     $h = explode("\t", parent::getReadFile()->read());
     $expected_columns = 14;
     if (($n = count($h)) != $expected_columns) {
         trigger_error("Found {$n} columns in gene file - expecting {$expected_columns}!", E_USER_WARNING);
         return false;
     }
     while ($l = parent::getReadFile()->read(200000)) {
         $a = explode("\t", $l);
         $id = parent::getNamespace() . $a[0];
         $label = $a[3];
         $this->genes[$a[0]] = $a[3];
         parent::addRDF(parent::describeIndividual($id, $label, parent::getVoc() . "Gene") . parent::describeClass(parent::getVoc() . "Gene", "PharmGKB Gene"));
         // link data
         parent::addRDF(parent::triplify($id, "rdfs:seeAlso", "http://pharmgkb.org/gene/" . $a[0]) . parent::triplify($id, "rdfs:seeAlso", "http://www4.wiwiss.fu-berlin.de/diseasome/resource/genes/" . $a[0]) . parent::triplify($id, "rdfs:seeAlso", "http://dbpedia.org/resource/" . $a[0]));
         if ($a[1]) {
             parent::addRDF(parent::triplify($id, parent::getVoc() . "x-ncbigene", "ncbigene:" . $a[1]));
         }
         if ($a[2]) {
             parent::addRDF(parent::triplify($id, parent::getVoc() . "x-ensembl", "ensembl:" . $a[2]));
         }
         if ($a[3]) {
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "name", $a[3]) . parent::describeProperty(parent::getVoc() . "name", "Relationship between a PharmGKB entity and its name"));
         }
         if ($a[4]) {
             parent::addRDF(parent::triplify($id, parent::getVoc() . "symbol", "symbol:" . $a[4]) . parent::describeProperty(parent::getVoc() . "symbol", "Relationship between a PharmGKB gene and a gene symbol"));
         }
         if ($a[5]) {
             $b = explode('","', substr($a[5], 1, -2));
             foreach ($b as $alt_name) {
                 parent::addRDF(parent::triplifyString($id, parent::getVoc() . "alternative-name", parent::safeLiteral(trim(stripslashes($alt_name)))));
             }
             parent::addRDF(parent::describeProperty(parent::getVoc() . "alternative-name", "Relationship between a PharmGKB gene and an alternative name"));
         }
         if ($a[6]) {
             // these are not hgnc symbols
             $b = explode('","', substr($a[6], 1, -2));
             foreach ($b as $alt_symbol) {
                 parent::addRDF(parent::triplifyString($id, parent::getVoc() . "alternate-symbol", trim($alt_symbol)));
             }
             parent::addRDF(parent::describeProperty($id, parent::getVoc() . "alternate-symbol", "Relationship between a PharmGKB gene and an alternate gene symbol"));
         }
         if ($a[7]) {
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "is-vip", $a[7]) . parent::describeProperty(parent::getVoc() . "is-vip", "Relationship between a PharmGKB gene and its vip status"));
         }
         if ($a[8]) {
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "has-variant-annotation", $a[8]) . parent::describeProperty(parent::getVoc() . "has-variant-annotation", "Relationship between a PharmGKB gene and whether it has a variant annotation"));
         }
         if ($a[9]) {
             $b = explode(",", $a[9]);
             foreach ($b as $xref) {
                 $xref = trim($xref);
                 if (!$xref) {
                     continue;
                 }
                 $url = false;
                 $x = $this->MapXrefs($xref, $url, $ns, $id2);
                 $ns = str_replace(' ', '', $ns);
                 if ($url == true) {
                     parent::addRDF(parent::QQuadO_URL($id, parent::getVoc() . "x-{$ns}", $x));
                 } else {
                     parent::addRDF(parent::triplify($id, parent::getVoc() . "x-{$ns}", $x));
                 }
             }
         }
         if ($a[10]) {
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "cpic-dosing-guideline", $a[10]));
         }
         if ($a[11]) {
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "chromosome", $a[11]) . parent::describeProperty(parent::getVoc() . "chrosomome", "Relationship between a PharmGKB gene and its chromosomal position") . parent::triplifyString($id, parent::getVoc() . "chromosome-start", $a[12]) . parent::triplifyString($id, parent::getVoc() . "chromosome-end", $a[13]));
         }
         parent::WriteRDFBufferToWriteFile();
     }
 }
Exemplo n.º 4
0
 function process()
 {
     $gb_record_str = "";
     while ($aLine = $this->getReadFile()->Read(4096)) {
         preg_match("/^\\/\\/\$/", $aLine, $matches);
         if (count($matches)) {
             //now remove the header if it is there
             $gb_record_str = $this->removeHeader($gb_record_str);
             $sectionsRaw = $this->parseGenbankRaw($gb_record_str);
             /**
              * SECTIONS being parsed:
              * locus, definition, accession, version, keywords, segment, source, reference, features
              */
             //get locus section(s)
             $locus = $this->retrieveSections("LOCUS", $sectionsRaw);
             $parsed_locus_arr = $this->parseLocus($locus);
             //get the definition section
             $definition = $this->retrieveSections("DEFINITION", $sectionsRaw);
             $parsed_definition_arr = $this->parseDefinition($definition);
             //get the accession
             $accessions = $this->retrieveSections("ACCESSION", $sectionsRaw);
             $parsed_accession_arr = $this->parseAccession($accessions);
             //get the version
             $versions = $this->retrieveSections("VERSION", $sectionsRaw);
             $parsed_version_arr = $this->parseVersion($versions);
             //get the keywords
             $keywords = $this->retrieveSections("KEYWORDS", $sectionsRaw);
             $parsed_keyword_arr = $this->parseKeywords($keywords);
             //may not be any segment section
             $segments = $this->retrieveSections("SEGMENT", $sectionsRaw);
             if (!empty($segments)) {
                 $parsed_segments_arr = $this->parseSegment($segments);
             }
             $features = $this->retrieveSections("FEATURES", $sectionsRaw);
             $parsed_features_arr = $this->parseFeatures($features);
             //get the source section
             $source = $this->retrieveSections("SOURCE", $sectionsRaw);
             $parsed_source_arr = $this->parseSource($source);
             $contig = $this->retrieveSections("CONTIG", $sectionsRaw);
             if (!empty($contig)) {
                 $parsed_contig_arr = $this->parseContig($contig);
             }
             //get the reference section
             $references = $this->retrieveSections("REFERENCE", $sectionsRaw);
             $parsed_refs_arr = $this->parseReferences($references);
             $gb_res = "gi:" . $parsed_version_arr['gi'];
             $gb_label = utf8_encode(htmlspecialchars($parsed_definition_arr[0]));
             parent::AddRDF(parent::describeIndividual($gb_res, $gb_label, $this->getVoc() . "genbank-record") . parent::triplifyString($gb_res, $this->getVoc() . 'sequence-length', $parsed_locus_arr[0]['sequence_length']) . parent::triplifyString($gb_res, $this->getVoc() . 'strandedness', $parsed_locus_arr[0]['strandedness']) . parent::triplify($gb_res, "rdf:type", $this->getRes() . $parsed_locus_arr[0]['mol_type']) . parent::triplifyString($gb_res, $this->getVoc() . 'chromosome-shape', $parsed_locus_arr[0]['chromosome_shape']) . parent::triplifyString($gb_res, $this->getVoc() . 'division-name', $parsed_locus_arr[0]['division_name']) . parent::triplifyString($gb_res, $this->getVoc() . 'date-of-entry', $parsed_locus_arr[0]['date']) . parent::triplifyString($gb_res, $this->getVoc() . 'source', utf8_encode($parsed_source_arr[0])) . parent::QQuadO_URL($gb_res, $this->getVoc() . 'fasta-seq', 'https://www.ncbi.nlm.nih.gov/sviewer/viewer.cgi?sendto=on&db=nucest&dopt=fasta&val=' . $parsed_version_arr['gi']));
             foreach ($parsed_features_arr as $aFeature) {
                 //getFeatures
                 $type = $aFeature['type'];
                 $feat_desc = $this->getFeatures($type);
                 $label = preg_replace('/\\s\\s*/', ' ', $feat_desc['definition']);
                 $comment = null;
                 $value = $aFeature['value'];
                 $value_arr = explode("/", $value);
                 $location = preg_replace('/\\n/', '', $value_arr[0]);
                 $class_id = parent::getVoc() . md5($type);
                 $feat_res = parent::getRes() . md5($type . $location . $gb_res);
                 $feat_label = utf8_encode($type . " " . $location . " for " . $gb_res);
                 if (isset($feat_desc['comment'])) {
                     $comment = $feat_desc['comment'];
                     $comment = preg_replace('/\\s\\s*/', ' ', $comment);
                     $label .= " " . $comment;
                 }
                 parent::AddRDF(parent::describeClass($class_id, $label, parent::getVoc() . "Feature") . parent::describeIndividual($feat_res, $feat_label, $class_id) . parent::triplify($gb_res, $this->getVoc() . "has-feature", $feat_res));
                 foreach ($value_arr as $aL) {
                     //check if aL has an equals in it
                     $p = "/(\\S+)\\=(.*)/";
                     preg_match($p, $aL, $m);
                     if (count($m)) {
                         if ($m[1] == "db_xref") {
                             parent::AddRDF(parent::triplify($feat_res, "rdfs:seeAlso", str_replace("\"", "", $m[2])));
                         } else {
                             parent::AddRDF(parent::triplifyString($feat_res, $this->getVoc() . $m[1], utf8_encode(str_replace("\"", "", $m[2]))));
                         }
                     }
                 }
             }
             foreach ($parsed_accession_arr[0] as $acc) {
                 parent::AddRDF(parent::triplifyString($gb_res, $this->getVoc() . "accession", $acc));
             }
             if (isset($parsed_version_arr['versioned_accession'])) {
                 parent::AddRDF(parent::triplifyString($gb_res, $this->getVoc() . "versioned-accession", $parsed_version_arr['versioned_accession']));
             }
             if (isset($parsed_contig_arr)) {
                 foreach ($parsed_contig_arr as $aContig) {
                     parent::AddRDF(parent::triplifyString($gb_res, $this->getVoc() . "contig", parent::safeLiteral($aContig)));
                 }
             }
             foreach ($parsed_keyword_arr as $akw) {
                 parent::AddRDF(parent::triplifyString($gb_res, $this->getVoc() . "keyword", $akw));
             }
             if (isset($parsed_segments_arr)) {
                 foreach ($parsed_segments_arr as $aSeg) {
                     parent::AddRDF(parent::triplifyString($gb_res, $this->getVoc() . "segment-number", $aSeg['segment_number']) . parent::triplifyString($gb_res, $this->getVoc() . "total-segments", $aSeg['total_segments']));
                 }
             }
             foreach ($parsed_refs_arr as $aRef) {
                 $r = rand();
                 $ref_res = $this->getRes() . md5($r);
                 $ref_label = "reference for " . $gb_res;
                 if (isset($aRef['TITLE'])) {
                     parent::AddRDF(parent::describeIndividual($ref_res, $ref_label, $this->getVoc() . "reference") . parent::triplifyString($ref_res, $this->getVoc() . "title", $aRef['TITLE']));
                 }
                 if (isset($aRef['PUBMED'])) {
                     parent::AddRDF(parent::triplify($ref_res, $this->getVoc() . "x-pubmed", 'pubmed:' . $aRef['PUBMED']));
                 }
                 if (isset($aRef['AUTHORS'])) {
                     parent::AddRDF(parent::triplifyString($ref_res, $this->getVoc() . "authors", $aRef['AUTHORS']));
                 }
                 parent::AddRDF(parent::triplify($gb_res, $this->getVoc() . "reference", $ref_res) . parent::triplifyString($ref_res, $this->getVoc() . "coordinates", $aRef['COORDINATES']) . parent::triplifyString($ref_res, $this->getVoc() . "citation", $aRef['JOURNAL']));
             }
             $gb_record_str = "";
             $this->WriteRDFBufferToWriteFile();
             continue;
         }
         preg_match("/^\n\$/", $aLine, $matches);
         if (count($matches) == 0) {
             $gb_record_str .= $aLine;
         }
     }
     //while
 }