Exemplo n.º 1
0
 function addDate($id, $field, $dateobj)
 {
     if ($dateobj == null) {
         return FALSE;
     }
     $year = $dateobj->Year;
     $month = $dateobj->Month;
     $day = $dateobj->Day;
     parent::addRDF(parent::triplifyString($id, parent::getVoc() . $field, "{$year}-{$month}-{$day}", "xsd:date"));
 }
Exemplo n.º 2
0
 public function makeDescription($title, $type)
 {
     if (!$title) {
         return null;
     }
     $uri = parent::getRes() . md5($title);
     $type_uri = parent::getVoc() . str_replace(" ", "-", $type);
     parent::addRDF(parent::describeIndividual($uri, $title, $type_uri) . parent::describeClass($type_uri, $type));
     return $uri;
 }
Exemplo n.º 3
0
 function parseItem($item)
 {
     $id = $item['@attributes']['id'];
     $label = $item['name'];
     parent::addRDF(parent::describeIndividual($id, $item['name'], parent::getVoc() . "Entry") . parent::describeClass(parent::getVoc() . "Entry", "MIRIAM database entry") . parent::triplifyString($id, parent::getVoc() . "namespace", $item['namespace']));
     if (isset($item['@attributes'])) {
         foreach ($item['@attributes'] as $k => $v) {
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . $k, $v));
         }
     }
     if (isset($item['comment'])) {
         parent::addRDF(parent::triplifyString($id, parent::getVoc() . "comment", $item['comment']));
     }
     if (isset($item['definition'])) {
         parent::addRDF(parent::triplifyString($id, parent::getVoc() . "definition", $item['definition']));
     }
     if (isset($item['synonyms'])) {
         $mylist = null;
         if (is_array($item['synonyms']['synonym'])) {
             $mylist = $item['synonyms']['synonym'];
         } else {
             $mylist[] = $item['synonyms']['synonym'];
         }
         foreach ($mylist as $myitem) {
             parent::addRDF(parent::triplifyString($id, "skos:altLabel", $myitem));
         }
     }
     if (isset($item['uris'])) {
         foreach ($item['uris']['uri'] as $uri) {
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "uri", $uri));
         }
     }
     if (isset($item['resources'])) {
         $mylist = null;
         if (!isset($item['resources']['resource']['dataEntry'])) {
             $mylist = $item['resources']['resource'];
         } else {
             $mylist[] = $item['resources']['resource'];
         }
         foreach ($mylist as $myitem) {
             $rid = $myitem['@attributes']['id'];
             parent::addRDF(parent::describeIndividual($rid, $myitem['dataInfo'], parent::getVoc() . "Resource") . parent::describeClass(parent::getVoc() . "Resource", "MIRIAM Resource") . parent::triplify($rid, parent::getVoc() . "url", $myitem['dataResource']) . parent::triplifyString($rid, parent::getVoc() . "urlTemplate", $myitem['dataEntry']) . parent::triplifyString($rid, parent::getVoc() . "organization", is_array($myitem['dataInstitution']) ? "" : $myitem['dataInstitution']) . parent::triplifyString($rid, parent::getVoc() . "location", is_array($myitem['dataLocation']) ? "" : $myitem['dataLocation']) . parent::triplify($id, parent::getVoc() . "resource", $rid));
         }
     }
     if (isset($item['tags'])) {
         $i = $item['tags']['tag'];
         $mylist = null;
         if (!is_array($i)) {
             $mylist[] = $i;
         } else {
             $mylist = $i;
         }
         foreach ($mylist as $myitem) {
             parent::addRDF(parent::triplifyString($id, parent::getvoc() . "tag", $myitem));
         }
     }
     if (isset($item['documentations'])) {
         $i = $item['documentations']['documentation'];
         $mylist = null;
         if (!is_array($i)) {
             $mylist[] = $i;
         } else {
             $mylist = $i;
         }
         foreach ($mylist as $myitem) {
             if (strstr($myitem, "pubmed")) {
                 $uri = "pubmed:" . substr($myitem, strrpos($myitem, ":") + 1);
             } else {
                 if (strstr($myitem, "doi")) {
                     $uri = "http://dx.doi.org/" . substr($myitem, strpos($myitem, "doi:"));
                 } else {
                     $uri = $myitem;
                 }
             }
             parent::addRDF(parent::triplify($id, parent::getvoc() . "documentation", $uri));
         }
     }
     if (isset($item['restrictions'])) {
         $mylist = null;
         if (!isset($item['restrictions']['restriction']['statement'])) {
             $mylist = $item['restrictions']['restriction'];
         } else {
             $mylist[] = $item['restrictions']['restriction'];
         }
         foreach ($mylist as $i => $myitem) {
             $rid = parent::getRes() . str_replace(":", "", $id) . "_" . ($i + 1);
             $a = $myitem['@attributes'];
             $rid_type = parent::getVoc() . 'restriction_type_' . $a['type'];
             parent::addRDF(parent::describeIndividual($rid, $a['desc'], parent::getVoc() . "Restriction") . parent::describeClass(parent::getVoc() . "Restriction", "Resource Restriction") . parent::triplify($rid, "rdf:type", $rid_type) . parent::describeClass($rid_type, $a['desc'], parent::getVoc() . "Restriction") . parent::triplifyString($rid, "dct:description", $myitem['statement']) . parent::triplify($rid, "foaf:page", isset($myitem['link']) ? $myitem['link'] : "") . parent::triplify($id, parent::getVoc() . "restriction", $rid));
         }
     }
     /*
     <annotation>
     	<format name="SBML">
     		<elements>
     			<element>reaction</element>
     			<element>event</element>
     			<element>rule</element>
     			<element>species</element>
     		</elements>
     	</format>
     */
     if (isset($item['annotation'])) {
         $mylist = null;
         if (!isset($item['annotation']['format']['elements'])) {
             $mylist = $item['annotation']['format'];
         } else {
             $mylist[] = $item['annotation']['format'];
         }
         foreach ($mylist as $i => $myitem) {
             $name = $myitem['@attributes']['name'];
             $myid = str_replace("MIR:", parent::getRes(), $id) . "_annotation_" . ($i + 1) . "_" . urlencode($name);
             parent::addRDF(parent::describeIndividual($myid, "{$label} used by {$name}", parent::getVoc() . "ValueSet") . parent::describeClass(parent::getVoc() . "ValueSet", "MIRIAM Value Set") . parent::triplifyString($myid, parent::getVoc() . "used-in", $name) . parent::triplify($myid, parent::getVoc() . "uses", $id));
             $b = $myitem['elements']['element'];
             $mylist2 = null;
             if (!is_array($b)) {
                 $mylist2[] = $b;
             } else {
                 $mylist2 = $b;
             }
             foreach ($mylist2 as $i => $e) {
                 parent::addRDF(parent::triplifyString($myid, parent::getVoc() . "used-for", $e));
             }
         }
     }
 }
Exemplo n.º 4
0
 private function process()
 {
     $z = 0;
     $y = 1;
     while ($l = $this->getReadFile()->Read(200000)) {
         if ($z++ % 1000000 == 0) {
             echo $z . PHP_EOL;
             $odir = parent::getParameterValue('outdir');
             $ofile = 'iproclass.' . $y++ . "." . parent::getParameterValue('output_format');
             $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
             if (parent::getWriteFile() != null) {
                 parent::getWriteFile()->close();
                 parent::clear();
             }
             // generate a new file
             parent::setWriteFile($odir . $ofile, $gz);
         }
         $fields = explode("\t", $l);
         @($uniprot_acc = $fields[0]);
         @($uniprot = $fields[1]);
         @($gene = $fields[2]);
         @($refseq = $fields[3]);
         @($gi = $fields[4]);
         @($pdb = $fields[5]);
         @($pfam = $fields[6]);
         @($go = $fields[7]);
         @($pirsf = $fields[8]);
         @($ipi = $fields[9]);
         @($uniref_100 = $fields[10]);
         @($uniref_90 = $fields[11]);
         @($uniref_50 = $fields[12]);
         @($uniparc = $fields[13]);
         //skipping pir-psd because db no longer maintained
         @($ncbi_taxonomy = $fields[15]);
         @($mim = $fields[16]);
         @($unigene = $fields[17]);
         @($ensembl = $fields[18]);
         @($pubmed = $fields[19]);
         @($embl_genbank_ddbj = $fields[20]);
         @($embl_protein = trim($fields[21]));
         $id = $uniprot_acc;
         $id_res = $this->getNamespace() . $id;
         $id_label = "iproclass entry for uniprot:{$uniprot_acc}";
         parent::addRDF(parent::triplify($id_res, $this->getVoc() . "x-uniprot", "uniprot:" . $uniprot_acc));
         if (!empty($uniprot)) {
             $uniprot_ids = explode("; ", $uniprot);
             foreach ($uniprot_ids as $uniprot_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-uniprot", "uniprot:" . $uniprot_id));
             }
         }
         if (!empty($gene)) {
             $gene_ids = explode("; ", $gene);
             foreach ($gene_ids as $gene_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-ncbigene", "geneid:" . $gene_id));
             }
         }
         if (!empty($refseq)) {
             $refseq_ids = explode("; ", $refseq);
             foreach ($refseq_ids as $refseq_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-refseq", "refseq:" . $refseq_id));
             }
         }
         if (!empty($gi)) {
             $gi_ids = explode("; ", $gi);
             foreach ($gi_ids as $gi_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-gi", "gi:" . $gi_id));
             }
         }
         if (!empty($pdb)) {
             $pdb_ids = explode("; ", $pdb);
             foreach ($pdb_ids as $pdb_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-pdb", "pdb:" . $pdb_id));
             }
         }
         if (!empty($pfam)) {
             $pfam_ids = explode("; ", $pfam);
             foreach ($pfam_ids as $pfam_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-pfam", "pfam:" . $pfam_id));
             }
         }
         if (!empty($go)) {
             $go_ids = explode("; ", $go);
             foreach ($go_ids as $go_id) {
                 $go_id = substr($go_id, 3);
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-go", "go:" . $go_id));
             }
         }
         if (!empty($pirsf)) {
             $pirsf_ids = explode("; ", $pirsf);
             foreach ($pirsf_ids as $pirsf_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-pirsf", "pirsf:" . $pirsf_id));
             }
         }
         if (!empty($ipi)) {
             $ipi_ids = explode("; ", $ipi);
             foreach ($ipi_ids as $ipi_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-ipi", "ipi:" . $ipi_id));
             }
         }
         if (!empty($uniref_100)) {
             $uniref_100_ids = explode("; ", $uniref_100);
             foreach ($uniref_100_ids as $uniref_100_id) {
                 parent::AddRDF(parent::QQuadO_URL($id_res, "rdfs:seeAlso", "http://uniprot.org/uniref/" . $uniref_100_id));
             }
         }
         if (!empty($uniref_90)) {
             $uniref_90_ids = explode("; ", $uniref_90);
             foreach ($uniref_90_ids as $uniref_90_id) {
                 parent::AddRDF(parent::QQuadO_URL($id_res, "rdfs:seeAlso", "http://uniprot.org/uniref/" . $uniref_90_id));
             }
         }
         if (!empty($uniref_50)) {
             $uniref_50_ids = explode("; ", $uniref_50);
             foreach ($uniref_50_ids as $uniref_50_id) {
                 parent::AddRDF(parent::QQuadO_URL($id_res, "rdfs:seeAlso", "http://uniprot.org/uniref/" . $uniref_50_id));
             }
         }
         if (!empty($uniparc)) {
             $uniparc_ids = explode("; ", $uniparc);
             foreach ($uniparc_ids as $uniparc_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-uniparc", "uniparc:" . $uniparc_id) . parent::QQuadO_URL($id_res, "rdfs:seeAlso", "http://uniprot.org/uniparc/" . $uniparc_id));
             }
         }
         if (!empty($ncbi_taxonomy)) {
             $taxonomy_ids = explode("; ", $ncbi_taxonomy);
             foreach ($taxonomy_ids as $taxonomy_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-taxon", "taxon:" . $taxonomy_id));
             }
         }
         if (!empty($mim)) {
             $mim_ids = explode("; ", $mim);
             foreach ($mim_ids as $mim_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-omim", "omim:" . $mim_id));
             }
         }
         if (!empty($unigene)) {
             $unigene_ids = explode("; ", $unigene);
             foreach ($unigene_ids as $unigene_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-unigene", "unigene:" . $unigene_id));
             }
         }
         if (!empty($ensembl)) {
             $ensembl_ids = explode("; ", $ensembl);
             foreach ($ensembl_ids as $ensembl_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-ensembl", "ensembl:" . $ensembl_id));
             }
         }
         if (!empty($pubmed)) {
             $pubmed_ids = explode("; ", $pubmed);
             foreach ($pubmed_ids as $pubmed_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-pubmed", "pubmed:" . $pubmed_id));
             }
         }
         if (!empty($embl_genbank_ddbj)) {
             $genbank_ids = explode("; ", $embl_genbank_ddbj);
             foreach ($genbank_ids as $genbank_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-genbank", "genbank:" . $genbank_id));
             }
         }
         if (!empty($embl_protein)) {
             $embl_protein_ids = explode(";", $embl_protein);
             foreach ($embl_protein_ids as $embl_protein_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-genbank", "genbank:" . $embl_protein_id));
             }
         }
         //write rdf to file
         $this->WriteRDFBufferToWriteFile();
     }
     //while
 }
Exemplo n.º 5
0
 function gene_interactions()
 {
     while ($l = parent::getReadFile()->Read()) {
         if ($l[0] == '#') {
             continue;
         }
         $data = explode("\t", $l);
         if (count($data) != 11) {
             trigger_error("Found " . count($data) . " columns, expecting 11");
             continue;
         }
         $interaction = $data[0];
         $interaction_type = str_replace("_", "-", $data[1]);
         $interaction_type_label = str_replace("_", " ", $data[1]);
         $int_additional_info = $data[2];
         $gene1 = $data[5];
         $gene2 = $data[8];
         $interaction_id = parent::getNamespace() . $interaction;
         if ($interaction_type == "Genetic") {
             $int_pred = parent::getVoc() . "genetically-interacts-with";
         } elseif ($interaction_type == "Physical") {
             $int_pred = parent::getVoc() . "physically-interacts-with";
         } elseif ($interaction_type == "Predicted") {
             $int_pred = parent::getVoc() . "predicted-to-interact-with";
         } elseif ($interaction_type == "Regulatory") {
             $int_pred = parent::getVoc() . "regulates";
         }
         //elseif
         if ($int_additional_info == "No_interaction") {
             $interaction_label = "No " . strtolower($interaction_type) . " interaction between " . $gene1 . " and " . $gene2;
             parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, parent::getVoc() . $interaction_type . "-Non-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Non-Interaction", $interaction_type_label . " non-interaction") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2));
             $npa_id = parent::getRes() . md5($interaction_id . "negative property assertion");
             $npa_label = "Negative property assertion stating that " . $gene1 . " and " . $gene2 . " do not have a " . $interaction_type_label . " interaction";
             parent::addRDF(parent::describeIndividual($npa_id, $npa_label, "owl:NegativeObjectPropertyAssertion") . parent::triplify($npa_id, "owl:sourceIndividual", parent::getNamespace() . $gene1) . parent::triplify($npa_id, "owl:targetIndividual", parent::getNamespace() . $gene2) . parent::triplify($npa_id, "owl:assertionProperty", $int_pred));
         } elseif ($int_additional_info == "N/A" || $int_additional_info == "Genetic_interaction") {
             $interaction_label = $interaction_type . " interaction between " . $gene1 . " and " . $gene2;
             parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, parent::getVoc() . $interaction_type . "-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Interaction", $interaction_type_label . " Interaction") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2) . parent::triplify(parent::getNamespace() . $gene1, $int_pred, parent::getNamespace() . $gene2));
         } else {
             $interaction_label = ($int_additional_info != "" ? $int_additional_info . " " : "") . strtolower($interaction_type) . " interaction between " . $gene1 . " and " . $gene2;
             $type = parent::getVoc() . ($int_additional_info != "" ? $int_additional_info . "-" : "") . $interaction_type . "-Interaction";
             $type_label = ($int_additional_info != "" ? $int_additional_info . " " : "") . $interaction_type_label . " Interaction";
             parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, $type) . parent::describeClass($type, $type_label, parent::getVoc() . $interaction_type . "-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Interaction", $interaction_type . " Interation") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2) . parent::triplify(parent::getNamespace() . $gene1, $int_pred, parent::getNamespace() . $gene2));
         }
         //else
         parent::WriteRDFBufferToWriteFile();
     }
     //while
 }
Exemplo n.º 6
0
 function Parse($file)
 {
     parent::getReadFile()->read();
     // skip the first comment line
     $line = 1;
     $first = true;
     while ($l = parent::getReadFile()->read(500000)) {
         if ($l[0] == "#") {
             // dataset attributes
             $a = explode('=', trim($l));
             $r = $this->getVoc() . substr($a[0], 2);
             if (isset($a[1])) {
                 $v = $a[1];
                 if ($r == "affymetrix_vocabulary:genome-version-create_date") {
                     $x = explode("-", $a[1]);
                     if ($x[2] == "00") {
                         $x[2] = "01";
                     }
                     $v = implode("-", $x);
                 }
                 parent::addRDF(parent::triplifyString(parent::getDatasetURI(), $r, $v) . parent::describe($r, "{$r}"));
             }
             continue;
         }
         if ($first == true) {
             $first = false;
             // header
             $header = explode(",", str_replace('"', '', trim($l)));
             //				print_r($header);exit;
             $n = count($header);
             if ($n != 41) {
                 trigger_error("Expecting 41 columns, found {$n} in header on line {$line}!", E_USER_ERROR);
                 exit;
             }
             continue;
         }
         $a = explode('","', substr($l, 1, -2));
         $n = count($a);
         if ($n != 41) {
             trigger_error("Expecting 41 columns, found {$n} on line {$line}!", E_USER_ERROR);
             exit;
         }
         parent::writeRDFBufferToWriteFile();
         $id = $a[0];
         $qname = "affymetrix:{$id}";
         $label = "probeset {$a['0']} on GeneChip {$a['1']} ({$a['2']})";
         parent::addRDF(parent::describeIndividual($qname, $label, $this->getVoc() . "Probeset") . parent::describeClass($this->getVoc() . "Probeset", "Affymetrix probeset"));
         trigger_error($id, E_USER_NOTICE);
         // now process the entries
         foreach ($a as $k => $v) {
             if (trim($v) == '---') {
                 continue;
             }
             // multi-valued entries are separated by ////
             $b = explode(" /// ", $v);
             $r = $this->Map($k);
             if (isset($r)) {
                 foreach ($b as $c) {
                     $d = explode(" // ", $c);
                     if ($r == 'symbol') {
                         $d[0] = str_replace(" ", "-", $d[0]);
                     }
                     $s = $this->getRegistry()->getPreferredPrefix($r);
                     if ($s == "ec") {
                         $e = explode(":", $d[0]);
                         $d[0] = $e[1];
                     }
                     $this->addRDF(parent::triplify($qname, $this->getVoc() . "x-{$s}", "{$s}:" . $d[0]) . parent::describeProperty($this->getVoc() . "x-{$s}", "a relation to {$s}"));
                 }
             } else {
                 // we handle manually
                 unset($rel);
                 $label = $header[$k];
                 switch ($label) {
                     case 'GeneChip Array':
                         $array_id = parent::getRes() . str_replace(" ", "-", $v);
                         parent::addRDF(parent::triplify($qname, $this->getVoc() . "genechip-array", $array_id) . parent::describeIndividual($array_id, "Affymetrix {$v} GeneChip array", $this->getVoc() . "Genechip-Array") . parent::describeClass($this->getVoc() . "Genechip-Array", "Affymetrix GeneChip array"));
                         break;
                     case 'Gene Ontology Biological Process':
                         if (!isset($rel)) {
                             $rel = 'go-process';
                             $prefix = "go";
                         }
                     case 'Gene Ontology Cellular Component':
                         if (!isset($rel)) {
                             $rel = 'go-location';
                             $prefix = "go";
                         }
                     case 'Gene Ontology Molecular Function':
                         if (!isset($rel)) {
                             $rel = 'go-function';
                             $prefix = "go";
                         }
                         $b = explode(" /// ", $v);
                         foreach ($b as $c) {
                             $d = explode(" // ", $c);
                             parent::addRDF($this->triplify($qname, $this->getVoc() . $rel, "{$prefix}:" . $d[0]) . $this->describeProperty($this->getVoc() . $rel, "{$rel}"));
                         }
                         break;
                     case 'Transcript Assignments':
                         $b = explode(" /// ", $v);
                         foreach ($b as $c) {
                             $d = explode(" // ", $c);
                             $id = $d[0];
                             $prefix = $d[2];
                             if ($prefix == '---' || $id == '---') {
                                 continue;
                             } else {
                                 if ($prefix == 'gb' || $prefix == 'gb_htc') {
                                     $prefix = 'genbank';
                                 } else {
                                     if ($prefix == 'ncbibacterial') {
                                         $prefix = 'gi';
                                     } else {
                                         if ($prefix == 'ncbi_bacterial') {
                                             $prefix = 'gi';
                                         } else {
                                             if ($prefix == 'ens') {
                                                 $prefix = 'ensembl';
                                             } else {
                                                 if ($prefix == 'ncbi_mito' || $prefix == 'ncbi_organelle' || $prefix == 'organelle') {
                                                     $prefix = 'refseq';
                                                 } else {
                                                     if ($prefix == 'affx' || $prefix == 'unknown' || $prefix == "prop") {
                                                         $prefix = 'affymetrix';
                                                     } else {
                                                         if ($prefix == 'tigr_2004_08') {
                                                             $prefix = 'tigr';
                                                         } else {
                                                             if ($prefix == 'tigr-plantta') {
                                                                 $prefix = 'genbank';
                                                             } else {
                                                                 if ($prefix == 'newrs.gi') {
                                                                     $prefix = 'gi';
                                                                 } else {
                                                                     if ($prefix == 'newRS.gi') {
                                                                         $prefix = 'gi';
                                                                     } else {
                                                                         if ($prefix == 'primate_viral') {
                                                                             $prefix = 'genbank';
                                                                         } else {
                                                                             if ($prefix == 'jgi-bacterial') {
                                                                                 $prefix = 'ncbigene';
                                                                             } else {
                                                                                 if ($prefix == 'tb') {
                                                                                     $prefix = 'tuberculist';
                                                                                 } else {
                                                                                     if ($prefix == 'pa') {
                                                                                         $prefix = 'pseudomonas';
                                                                                     } else {
                                                                                         if ($prefix == 'gi|53267') {
                                                                                             $prefix = 'gi';
                                                                                             $id = '53267';
                                                                                         } else {
                                                                                             if ($prefix == 'broad-tcup') {
                                                                                                 $e = explode("-", $id);
                                                                                                 $id = $e[0];
                                                                                             } else {
                                                                                                 if ($prefix == 'organelle') {
                                                                                                     $e = explode("-", $id);
                                                                                                     $prefix = 'genbank';
                                                                                                     $id = $e[0];
                                                                                                 }
                                                                                             }
                                                                                         }
                                                                                     }
                                                                                 }
                                                                             }
                                                                         }
                                                                     }
                                                                 }
                                                             }
                                                         }
                                                     }
                                                 }
                                             }
                                         }
                                     }
                                 }
                             }
                             parent::addRDF(parent::triplify($qname, $this->getVoc() . "transcript-assignment", "{$prefix}:{$id}") . parent::describeProperty($this->getVoc() . "transcript-assignment", "transcript assignment"));
                         }
                         break;
                     case 'Annotation Transcript Cluster':
                         /*
                         							$id = substr($v,0,strpos($v,"("));
                         								
                         
                         							$rel = str_replace(" ","-",strtolower($label));
                         							$this->AddRDF($this->triplify($qname,parent::getVoc()."$rel", "refseq:$id"));
                         */
                         break;
                     case 'Annotation Date':
                         // Jun 9, 2011
                         $rel = "annotation-date";
                         preg_match("/^([A-Za-z]+) ([0-9]+), ([0-9]{4})\$/", $v, $m);
                         if (count($m) == 4) {
                             array_shift($m);
                             list($m, $day, $year) = $m;
                             $month = $this->getMonth($m);
                             if (!$day || $day == "0") {
                                 $day = "01";
                             }
                             $date = $year . "-" . $month . "-" . str_pad($day, 2, "0", STR_PAD_LEFT) . "T00:00:00Z";
                             parent::addRDF(parent::triplifyString($qname, $this->getVoc() . $rel, $date, "xsd:dateTime") . parent::describeProperty($this->getVoc() . $rel, "{$rel}"));
                         } else {
                             trigger_error("could not match date from {$v}", E_USER_ERROR);
                         }
                         break;
                     case 'Species Scientific Name':
                         break;
                     case 'Transcript ID(Array Design)':
                         if (!isset($rel)) {
                             $rel = 'transcript';
                         }
                     case 'Sequence type':
                     default:
                         if (!isset($rel)) {
                             $rel = str_replace(" ", "-", strtolower($label));
                         }
                         $b = explode(" /// ", $v);
                         foreach ($b as $c) {
                             parent::addRDF(parent::triplifyString($qname, $this->getVoc() . $rel, stripslashes($c)) . parent::describeProperty($this->getVoc() . $rel, "{$rel}"));
                         }
                         break;
                 }
                 //  switch
             }
             // else
         }
         $this->WriteRDFBufferToWriteFile();
     }
 }
Exemplo n.º 7
0
 function product($fpin)
 {
     $z = 0;
     $list = '';
     fgets($fpin);
     // header
     while ($l = fgets($fpin, 100000)) {
         $a = explode("\t", $l);
         if (count($a) != 18) {
             trigger_error("Expected 18 coloumns, instead found" . count($a));
             continue;
         }
         $product_id = parent::getNamespace() . $a[0];
         $product_label = $a[3];
         $product_type_label = ucfirst(strtolower($a[2]));
         $product_type = parent::getVoc() . str_replace(" ", "-", $product_label);
         parent::addRDF(parent::describeIndividual($product_id, $a[3], parent::getVoc() . "Product") . parent::describeClass(parent::getVoc() . "Product", "NDC Product") . parent::triplify($product_id, parent::getVoc() . "product-type", $product_type) . parent::describeIndividual($product_type, $product_type_label, parent::getVoc() . "Product-Type") . parent::describeClass(parent::getVoc() . "Product-Type", "Product Type") . parent::triplifyString($product_id, parent::getVoc() . "product-id", $a[1]) . parent::triplifyString($product_id, parent::getVoc() . "proprietary-name", $a[3]) . parent::triplifyString($product_id, parent::getVoc() . "trade-name-suffix", $a[4]));
         if ($a[5]) {
             $b = explode(";", $a[5]);
             foreach ($b as $c) {
                 parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "non-proprietary-name", trim($c)));
             }
         }
         if ($a[6]) {
             $b = explode(",", $a[6]);
             foreach ($b as $c) {
                 $dosageform = strtolower($c);
                 $dosageform_id = parent::getVoc() . str_replace(" ", "-", ucfirst(strtolower($c)));
                 parent::addRDF(parent::describeIndividual($dosageform_id, $dosageform, parent::getVoc() . "Dosage-Form") . parent::describeClass(parent::getVoc() . "Dosage-Form", "NDC Dosage Form") . parent::triplify($product_id, parent::getVoc() . "dosage-form", $dosageform_id));
             }
         }
         if ($a[7]) {
             //  MV
             $b = explode("; ", $a[7]);
             foreach ($b as $c) {
                 $route = strtolower(trim($c));
                 $route_id = parent::getVoc() . str_replace(" ", "-", ucfirst(strtolower($c)));
                 parent::addRDF(parent::describeIndividual($route_id, $route, parent::getVoc() . "Route") . parent::describeClass(parent::getVoc() . "Route", "NDC Drug Route") . parent::triplify($product_id, parent::getVoc() . "route", $route_id));
             }
         }
         if ($a[8]) {
             $date = substr(0, 4, $a[8]) . "-" . substr(4, 2, $a[8]) . "-" . substr(6, 2, $a[8]);
             parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "start-marketing-date", $date));
         }
         if ($a[9]) {
             $date = substr(0, 4, $a[9]) . "-" . substr(4, 2, $a[9]) . "-" . substr(6, 2, $a[9]);
             parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "end-marketing-date", $date));
         }
         if ($a[10]) {
             parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "marketing-category", $a[10]));
         }
         if ($a[11]) {
             parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "application-number", $a[11]));
         }
         // create a labeller node
         if ($a[12]) {
             $labeller_id = parent::getRes() . md5($a[12]);
             $label = addslashes($a[12]);
             parent::addRDF(parent::describeIndividual($labeller_id, $label, parent::getVoc() . "Labeller") . parent::describeClass(parent::getVoc() . "Labeller", "NDC Labeller") . parent::triplify($product_id, parent::getVoc() . "labeller", $labeller_id));
         }
         // the next three are together
         if ($a[13]) {
             // MV
             $substances = explode(";", $a[13]);
             $strengths = explode(";", $a[14]);
             $units = explode(";", $a[15]);
             $l = '';
             foreach ($substances as $i => $substance) {
                 // list the active ingredient
                 $ingredient_label = strtolower($substance);
                 $strength = '';
                 if (isset($strengths[$i])) {
                     $strength = $strengths[$i];
                 }
                 $unit = $units[$i];
                 $ingredient_id = parent::getRes() . md5($ingredient_label);
                 parent::addRDF(parent::describeIndividual($ingredient_id, $ingredient_label, parent::getVoc() . "Ingredient") . parent::describeClass(parent::getVoc() . "Ingredient", "NDC Ingredient") . parent::triplify($product_id, parent::getVoc() . "ingredient", $ingredient_id));
                 // describe the substance composition
                 $substance_label = "{$strength} {$unit} {$ingredient_label}";
                 $substance_id = parent::getRes() . md5($substance_label);
                 parent::addRDF(parent::describeIndividual($substance_id, $substance_label, parent::getVoc() . "Substance") . parent::triplifyString($substance_id, parent::getVoc() . "amount", $strength) . parent::describeClass(parent::getVoc() . "Substance", "NDC Substance"));
                 $unit_id = parent::getVoc() . md5($unit);
                 parent::addRDF(parent::describeIndividual($unit_id, $unit, parent::getVoc() . "Unit") . parent::describeClass(parent::getVoc() . "Unit", "NDC Unit") . parent::triplify($substance_id, parent::getVoc() . "amount_unit", $unit_id) . parent::triplify($product_id, parent::getVoc() . "has-part", $substance_id));
             }
         }
         if ($a[16]) {
             // MV
             $b = explode(",", $a[16]);
             foreach ($b as $c) {
                 $cat_id = parent::getVoc() . md5($c);
                 parent::addRDF(parent::describeIndividual($cat_id, $c, parent::getVoc() . "Pharmacological-Class") . parent::describeClass(parent::getVoc() . "Pharmacological-Class", "NDC Pharmacological Class") . parent::triplify($product_id, parent::getVoc() . "pharmacological-class", $cat_id));
             }
         }
         parent::WriteRDFBufferToWriteFile();
     }
 }
Exemplo n.º 8
0
 function ParseEntry($obj, $type)
 {
     $o = $obj["omim"]["entryList"][0]["entry"];
     $omim_id = $o['mimNumber'];
     $omim_uri = parent::getNamespace() . $o['mimNumber'];
     if (isset($o['version'])) {
         parent::setDatasetVersion($o['version']);
     }
     // add the links
     parent::addRDF($this->QQuadO_URL($omim_uri, "rdfs:seeAlso", "http://omim.org/entry/" . $omim_id));
     parent::addRDF($this->QQuadO_URL($omim_uri, "owl:sameAs", "http://identifiers.org/omim/" . $omim_id));
     // parse titles
     $titles = $o['titles'];
     parent::addRDF(parent::describeIndividual($omim_uri, $titles['preferredTitle'], parent::getVoc() . str_replace(array(" ", "/"), "-", ucfirst($type))) . parent::describeClass(parent::getVoc() . str_replace(array(" ", "/"), "-", ucfirst($type)), $type));
     if (isset($titles['preferredTitle'])) {
         parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "preferred-title", $titles['preferredTitle']));
     }
     if (isset($titles['alternativeTitles'])) {
         $b = explode(";;", $titles['alternativeTitles']);
         foreach ($b as $title) {
             parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "alternative-title", trim($title)));
         }
     }
     // parse text sections
     if (isset($o['textSectionList'])) {
         foreach ($o['textSectionList'] as $i => $section) {
             if ($section['textSection']['textSectionTitle'] == "Description") {
                 parent::addRDF(parent::triplifyString($omim_uri, "dc:description", $section['textSection']['textSectionContent']));
             } else {
                 $p = str_replace(" ", "-", strtolower($section['textSection']['textSectionTitle']));
                 parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "{$p}", $section['textSection']['textSectionContent']));
             }
             // parse the omim references
             preg_match_all("/\\{([0-9]{6})\\}/", $section['textSection']['textSectionContent'], $m);
             if (isset($m[1][0])) {
                 foreach ($m[1] as $oid) {
                     parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "refers-to", "omim:{$oid}"));
                 }
             }
         }
     }
     // allelic variants
     if (isset($o['allelicVariantList'])) {
         foreach ($o['allelicVariantList'] as $i => $v) {
             $v = $v['allelicVariant'];
             $uri = parent::getRes() . "{$omim_id}" . "_allele_" . $i;
             $label = str_replace("\n", " ", $v['name']);
             parent::addRDF(parent::describeIndividual($uri, $label, parent::getVoc() . "Allelic-Variant") . parent::describeClass(parent::getVoc() . "Allelic-Variant", "Allelic Variant"));
             if (isset($v['alternativeNames'])) {
                 $names = explode(";;", $v['alternativeNames']);
                 foreach ($names as $name) {
                     $name = str_replace("\n", " ", $name);
                     parent::addRDF(parent::triplifyString($uri, parent::getVoc() . "alternative-names", $name));
                 }
             }
             if (isset($v['text'])) {
                 parent::addRDF(parent::triplifyString($uri, "dc:description", $v['text']));
             }
             if (isset($v['mutations'])) {
                 parent::addRDF(parent::triplifyString($uri, parent::getVoc() . "mutation", $v['mutations']));
             }
             if (isset($v['dbSnps'])) {
                 $snps = explode(",", $v['dbSnps']);
                 foreach ($snps as $snp) {
                     parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-dbsnp", "dbsnp:" . $snp));
                 }
             }
             parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "variant", $uri));
         }
     }
     // clinical synopsis
     if (isset($o['clinicalSynopsis'])) {
         $cs = $o['clinicalSynopsis'];
         $cs_uri = parent::getRes() . "" . $omim_id . "_cs";
         parent::addRDF(parent::describeIndividual($cs_uri, "Clinical synopsis for omim {$omim_id}", parent::getVoc() . "Clinical-Synopsis") . parent::describeClass(parent::getVoc() . "Clinical-Synopsis", "Clinical Synopsis") . parent::triplify($omim_uri, parent::getVoc() . "clinical-synopsis", $cs_uri));
         foreach ($cs as $k => $v) {
             if (!strstr($k, "Exists")) {
                 // ignore the boolean assertion.
                 // @todo ignore provenance for now
                 if (in_array($k, array('contributors', 'creationDate', 'editHistory', 'epochCreated', 'dateCreated', 'epochUpdated', 'dateUpdated'))) {
                     continue;
                 }
                 if (!is_array($v)) {
                     $v = array($k => $v);
                 }
                 foreach ($v as $k1 => $v1) {
                     $phenotypes = explode(";", $v1);
                     foreach ($phenotypes as $coded_phenotype) {
                         // parse out the codes
                         $coded_phenotype = trim($coded_phenotype);
                         if (!$coded_phenotype) {
                             continue;
                         }
                         $phenotype = preg_replace("/\\{.*\\}/", "", $coded_phenotype);
                         $phenotype_id = parent::getRes() . "" . md5(strtolower($phenotype));
                         $entity_id = parent::getRes() . "" . $k1;
                         parent::addRDF(parent::describeIndividual($phenotype_id, $phenotype, parent::getVoc() . 'Characteristic') . parent::describeClass(parent::getVoc() . 'Characteristic', 'Characteristic') . parent::triplify($cs_uri, parent::getVoc() . "feature", $phenotype_id) . parent::describeIndividual($entity_id, $k1, parent::getVoc() . "Entity") . parent::describeClass(parent::getVoc() . "Entity", "Entity") . parent::triplify($phenotype_id, parent::getVoc() . "characteristic-of", $entity_id));
                         // parse out the vocab references
                         preg_match_all("/\\{([0-9A-Za-z \\:\\-\\.]+)\\}|;/", $coded_phenotype, $codes);
                         //preg_match_all("/((UMLS|HPO HP|SNOMEDCT|ICD10CM|ICD9CM|EOM ID)\:[A-Z0-9]+)/",$coded_phenotype,$m);
                         if (isset($codes[1][0])) {
                             foreach ($codes[1] as $entry) {
                                 $entries = explode(" ", trim($entry));
                                 foreach ($entries as $e) {
                                     if ($e == "HPO" || $e == "EOM") {
                                         continue;
                                     }
                                     $this->getRegistry()->parseQName($e, $ns, $id);
                                     if (!isset($ns) || $ns == '') {
                                         $b = explode(".", $id);
                                         $ns = "omim";
                                         $id = $b[0];
                                     } else {
                                         $ns = str_replace(array("hpo", "id", "icd10cm", "icd9cm", "snomedct"), array("hp", "eom", "icd10", "icd9", "snomed"), $ns);
                                     }
                                     parent::addRDF(parent::triplify($phenotype_id, parent::getVoc() . "x-{$ns}", "{$ns}:{$id}"));
                                 }
                                 // foreach
                             }
                             // foreach
                         }
                         // codes
                     }
                     //foreach
                 }
                 // foreach
             }
             // exists
         }
     }
     // clinical synopsis
     // genemap
     if (isset($o['geneMap'])) {
         $map = $o['geneMap'];
         if (isset($map['chromosome'])) {
             parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "chromosome", (string) $map['chromosome']));
         }
         if (isset($map['cytoLocation'])) {
             parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "cytolocation", (string) $map['cytoLocation']));
         }
         if (isset($map['geneSymbols'])) {
             $b = preg_split("/[,;\\. ]+/", $map['geneSymbols']);
             foreach ($b as $symbol) {
                 parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "gene-symbol", "symbol:" . trim($symbol)));
             }
         }
         if (isset($map['geneName'])) {
             $b = explode(",", $map['geneName']);
             foreach ($b as $name) {
                 parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "gene-name", trim($name)));
             }
         }
         if (isset($map['mappingMethod'])) {
             $b = explode(",", $map['mappingMethod']);
             foreach ($b as $c) {
                 $mapping_method = trim($c);
                 $method_uri = $this->get_method_type($mapping_method);
                 if ($method_uri !== false) {
                     parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "mapping-method", $method_uri));
                 }
             }
         }
         if (isset($map['mouseGeneSymbol'])) {
             $b = explode(",", $map['mouseGeneSymbol']);
             foreach ($b as $c) {
                 parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "mouse-gene-symbol", "symbol:" . strtoupper($c)));
             }
         }
         if (isset($map['mouseMgiID'])) {
             $b = explode(",", $map['mouseMgiID']);
             foreach ($b as $c) {
                 parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-mgi", $c));
             }
         }
         if (isset($map['geneInheritance']) && $map['geneInheritance'] != '') {
             parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "gene-inheritance", $map['geneInheritance']));
         }
     }
     if (isset($o['phenotypeMapList'])) {
         foreach ($o['phenotypeMapList'] as $i => $phenotypeMap) {
             $phenotypeMap = $phenotypeMap['phenotypeMap'];
             $pm_uri = parent::getRes() . $omim_id . "_pm_" . ($i + 1);
             parent::addRDF(parent::describeIndividual($pm_uri, "phenotype mapping for {$omim_id}", parent::getVoc() . "Phenotype-Map") . parent::describeClass(parent::getVoc() . "Phenotype-Map", "OMIM Phenotype-Map") . parent::triplify($omim_uri, parent::getVoc() . "phenotype-map", $pm_uri));
             foreach (array_keys($phenotypeMap) as $k) {
                 if (in_array($k, array("mimNumber", "phenotypeMimNumber", "phenotypicSeriesMimNumber"))) {
                     parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . $k, "omim:" . $phenotypeMap[$k]));
                 } else {
                     if ($k == "geneSymbols") {
                         $l = explode(", ", $phenotypeMap[$k]);
                         foreach ($l as $gene) {
                             parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . "gene-symbol", "hgnc.symbol:" . $gene));
                         }
                     } else {
                         if ($k == "phenotypeMappingKey") {
                             $l = $this->get_phenotype_mapping_method_type($phenotypeMap[$k]);
                             parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . "mapping-method", $l));
                         } else {
                             parent::addRDF(parent::triplifyString($pm_uri, parent::getVoc() . $k, $phenotypeMap[$k]));
                         }
                     }
                 }
             }
         }
     }
     // references
     if (isset($o['referenceList'])) {
         foreach ($o['referenceList'] as $i => $r) {
             $r = $r['reference'];
             if (isset($r['pubmedID'])) {
                 $pubmed_uri = "pubmed:" . $r['pubmedID'];
                 parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "article", $pubmed_uri));
                 $title = 'article';
                 if (isset($r['title'])) {
                     $title = $r['title'];
                 }
                 parent::addRDF(parent::describe($pubmed_uri, addslashes($r['title'])));
                 if (isset($r['articleUrl'])) {
                     parent::addRDF($this->QQuadO_URL($pubmed_uri, "rdfs:seeAlso", htmlentities($r['articleUrl'])));
                 }
             }
         }
     }
     // external ids
     if (isset($o['externalLinks'])) {
         foreach ($o['externalLinks'] as $k => $id) {
             if ($id === false) {
                 continue;
             }
             $ns = '';
             switch ($k) {
                 case 'approvedGeneSymbols':
                     $ns = 'symbol';
                     break;
                 case 'geneIDs':
                     $ns = 'ncbigene';
                     break;
                 case 'ncbiReferenceSequences':
                     $ns = 'gi';
                     break;
                 case 'genbankNucleotideSequences':
                     $ns = 'gi';
                     break;
                 case 'proteinSequences':
                     $ns = 'gi';
                     break;
                 case 'uniGenes':
                     $ns = 'unigene';
                     break;
                 case 'ensemblIDs':
                     $ns = 'ensembl';
                     break;
                 case 'swissProtIDs':
                     $ns = 'uniprot';
                     break;
                 case 'mgiIDs':
                     $ns = 'mgi';
                     $b = explode(":", $id);
                     $id = $b[1];
                     break;
                 case 'flybaseIDs':
                     $ns = 'flybase';
                     break;
                 case 'zfinIDs':
                     $ns = 'zfin';
                     break;
                 case 'hprdIDs':
                     $ns = 'hprd';
                     break;
                 case 'orphanetDiseases':
                     $ns = 'orphanet';
                     break;
                 case 'refSeqAccessionIDs':
                     $ns = 'refseq';
                     break;
                 case 'ordrDiseases':
                     $ns = 'ordr';
                     $b = explode(";;", $id);
                     $id = $b[0];
                     break;
                 case 'snomedctIDs':
                     $ns = 'snomed';
                     break;
                 case 'icd10cmIDs':
                     $ns = 'icd10';
                     break;
                 case 'icd9cmIDs':
                     $ns = 'icd9';
                     break;
                 case 'umlsIDs':
                     $ns = 'umls';
                     break;
                 case 'wormbaseIDs':
                     $ns = 'wormbase';
                     break;
                 case 'diseaseOntologyIDs':
                     $ns = 'do';
                     break;
                     // specifically ignorning
                 // specifically ignorning
                 case 'geneTests':
                 case 'cmgGene':
                 case 'geneticAllianceIDs':
                     // #
                 // #
                 case 'nextGxDx':
                 case 'nbkIDs':
                     // NBK1207;;Alport Syndrome and Thin Basement Membrane Nephropathy
                 // NBK1207;;Alport Syndrome and Thin Basement Membrane Nephropathy
                 case 'newbornScreeningUrls':
                 case 'decipherUrls':
                 case 'geneReviewShortNames':
                 case 'locusSpecificDBs':
                 case 'geneticsHomeReferenceIDs':
                 case 'omiaIDs':
                 case 'coriellDiseases':
                 case 'clinicalDiseaseIDs':
                 case 'possumSyndromes':
                 case 'keggPathways':
                 case 'gtr':
                 case 'gwasCatalog':
                 case 'mgiHumanDisease':
                 case 'wormbaseDO':
                 case 'dermAtlas':
                     // true/false
                     break;
                 default:
                     echo "unhandled external link {$k} {$id}" . PHP_EOL;
             }
             $ids = explode(",", $id);
             foreach ($ids as $id) {
                 if ($ns) {
                     if (strstr($id, ";;") === FALSE) {
                         parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-{$ns}", $ns . ':' . $id));
                     } else {
                         $b = explode(";;", $id);
                         // multiple ids//names
                         foreach ($b as $c) {
                             preg_match("/([a-z])/", $c, $m);
                             if (!isset($m[1])) {
                                 parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-{$ns}", $ns . ':' . $c));
                             }
                         }
                     }
                 }
             }
         }
     }
     //external links
 }
Exemplo n.º 9
0
 function Parse()
 {
     $l = parent::getReadFile()->read(100000);
     $header = explode("\t", trim(substr($l, 1)));
     if (($c = count($header)) != 54) {
         trigger_erorr("Expecting 54 columns, found {$c}!");
         return FALSE;
     }
     // check # of columns
     while ($l = parent::getReadFile()->read(500000)) {
         $a = explode("\t", trim($l));
         // irefindex identifiers
         $rigid = "irefindex." . $a[34];
         # checksum for interaction
         $rogida = "irefindex." . $a[32];
         # checksum for A
         $rogidb = "irefindex." . $a[33];
         # checksum for B
         $irigid = "irefindex.irigid:" . $a[44];
         # integer id for interaction
         $irogida = "irefindex.irogid:" . $a[42];
         # integer id for A
         $irogidb = "irefindex.irogid:" . $a[43];
         # integer id for B
         $crigid = "irefindex.crigid:" . $a[47];
         # checksum for canonical interaction
         $icrigid = "irefindex.icrigid:" . $a[50];
         # integer id for canonical interaction
         $crogida = "irefindex.crogid:" . $a[45];
         # checksum for A's canonical group
         $crogidb = "irefindex.crogid:" . $a[46];
         # checksum for B's canonical group
         $icrogida = "irefindex.icrogid:" . $a[48];
         # integer for A's canonical group
         $icrogidb = "irefindex.icrogid:" . $a[49];
         # integer for B's canonical group
         // 13 contains the original identifier, the rigid, and the edgetype
         $ids = explode("|", $a[13]);
         if (count($ids) != 3) {
             trigger_error("Expecting 3 entries in column 14");
             print_r($ids);
             exit;
         }
         parent::getRegistry()->parseQName($ids[0], $ns, $id);
         if ($id == '-') {
             // this happens with hprd
             $iid = "hprd:" . substr($ids[1], 6);
         } else {
             $iid = $ns . ":" . $id;
         }
         // get the type
         if ($a[52] == "X") {
             $label = "{$a['0']} - {$a['1']} Interaction";
             $type = "Pairwise-Interaction";
         } else {
             if ($a[52] == "C") {
                 $label = $a[53] . " component complex";
                 #num of participants
                 $type = "Multimeric-Complex";
             } else {
                 if ($a[52] == "Y") {
                     $label = "{$a['0']} homomeric complex";
                     $type = "Homopolymeric-Complex";
                 }
             }
         }
         parent::addRDF(parent::describeIndividual($iid, $label, parent::getVoc() . $type) . parent::describeClass(parent::getVoc() . $type, str_replace("-", " ", $type)));
         // interaction type[52] by method[6]
         unset($method);
         if ($a[6] != '-') {
             $data = $this->ParseStringArray($a[6]);
             $method = trim($data["label"]);
             $qname = trim($data["ns"]) . ":" . trim($data["id"]);
             if ($qname) {
                 parent::addRDF(parent::triplify($iid, parent::getVoc() . "method", $qname) . parent::describeClass($qname, $data['label']));
             }
         }
         parent::addRDF(parent::triplify($iid, "rdfs:seeAlso", "http://wodaklab.org/iRefWeb/interaction/show/" . $a[50]));
         // set the interactors
         for ($i = 0; $i <= 1; $i++) {
             $p = 'a';
             if ($i == 1) {
                 $p = 'b';
             }
             $data = $this->ParseStringArray($a[$i]);
             $interactor = trim($data["ns"]) . ":" . trim($data["id"]);
             parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}", $interactor));
             // biological role
             $role = $a[16 + $i];
             if ($role != '-') {
                 $data = $this->ParseStringArray($role);
                 $qname = trim($data["ns"]) . ":" . trim($data["id"]);
                 if ($qname != "mi:0000") {
                     parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}" . "_biological_role", $qname) . parent::describeClass($qname, $data['label']));
                 }
             }
             // experimental role
             $role = $a[18 + $i];
             if ($role != '-') {
                 $data = $this->ParseStringArray($role);
                 $qname = trim($data["ns"]) . ":" . trim($data["id"]);
                 if ($qname != "mi:0000") {
                     parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}" . "_experimental_role", $qname) . parent::describeClass($qname, $data['label']));
                 }
             }
             // interactor type
             $type = $a[20 + $i];
             if ($type != '-') {
                 $data = $this->ParseStringArray($type);
                 $qname = trim($data["ns"]) . ":" . trim($data["id"]);
                 parent::addRDF(parent::triplify($interactor, "rdf:type", $qname) . parent::describeClass($qname, $data['label']));
             }
         }
         // add the alternatives through the taxon + seq redundant group
         for ($i = 2; $i <= 3; $i++) {
             $taxid = '';
             $rogid = "irefindex." . $a[32 + ($i - 2)];
             parent::addRDF(parent::describeIndividual($rogid, "", parent::getVoc() . "Taxon-Sequence-Identical-Group") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Identical-Group", "Taxon + Sequence Identical Group"));
             $tax = $a[9 + ($i - 2)];
             if ($tax && $tax != '-' && $tax != '-1') {
                 $data = $this->ParseStringArray($tax);
                 $taxid = trim($data["ns"]) . ":" . trim($data["id"]);
                 parent::addRDF(parent::triplify($rogid, parent::getVoc() . "x-taxonomy", $taxid));
             }
             $list = explode("|", $a[3 + ($i - 2)]);
             foreach ($list as $item) {
                 $data = $this->ParseStringArray($item);
                 $ns = trim($data["ns"]);
                 $id = trim($data["id"]);
                 $qname = $ns . ":" . $id;
                 if ($ns && $ns != 'rogid' && $ns != 'irogid' and $id != '-') {
                     parent::addRDF(parent::triplify($rogid, parent::getVoc() . "has-member", $qname));
                     if ($taxid && $taxid != '-' && $taxid != '-1') {
                         parent::addRDF(parent::triplify($qname, parent::getVoc() . "x-taxonomy", $taxid));
                     }
                 }
             }
         }
         // publications
         $list = explode("|", $a[8]);
         foreach ($list as $item) {
             if ($item == '-' && $item != 'pubmed:0') {
                 continue;
             }
             $data = $this->ParseStringArray($item);
             $qname = trim($data["ns"]) . ":" . trim($data["id"]);
             parent::addRDF(parent::triplify($iid, parent::getVoc() . "article", $qname));
         }
         // MI interaction type
         if ($a[11] != '-' && $a[11] != 'NA') {
             $data = $this->ParseStringArray($a[11]);
             $qname = trim($data["ns"]) . ":" . trim($data["id"]);
             parent::addRDF(parent::triplify($iid, "rdf:type", $qname));
             if (!isset($defined[$qname])) {
                 $defined[$qname] = '';
                 parent::addRDF(parent::triplifyString($qname, "rdfs:label", $data['label']));
             }
         }
         // source
         if ($a[12] != '-') {
             $data = $this->ParseStringArray($a[12]);
             $qname = trim($data["ns"]) . ":" . trim($data["id"]);
             parent::addRDF(parent::triplify($iid, parent::getVoc() . "source", $qname));
         }
         // confidence
         $list = explode("|", $a[14]);
         foreach ($list as $item) {
             $data = $this->ParseStringArray($item);
             $ns = trim($data["ns"]);
             $id = trim($data["id"]);
             if ($ns == 'lpr') {
                 //  lowest number of distinct interactions that any one article reported
                 parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "minimum-number-interactions-reported", $id));
             } else {
                 if ($ns == "hpr") {
                     //  higher number of distinct interactions that any one article reports
                     parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "maximum-number-interactions-reported", $id));
                 } else {
                     if ($ns = 'hp') {
                         //  total number of unique PMIDs used to support the interaction
                         parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "number-supporting-articles", $id));
                     }
                 }
             }
         }
         // expansion method
         if ($a[15]) {
             $id = parent::getRes() . md5($a[15]);
             parent::addRDF(parent::describeIndividual($id, $a[15], parent::getVoc() . "Expansion-Method") . parent::describeClass(parent::getVoc() . "Expansion-Method", "Expansion Method") . parent::triplify($iid, parent::getVoc() . "expansion-method", $id));
         }
         // host organism
         if ($a[28] != '-') {
             $data = $this->ParseStringArray($a[28]);
             $qname = trim($data["ns"]) . ":" . trim($data["id"]);
             parent::addRDF(parent::triplify($iid, parent::getVoc() . "host-organism", $qname));
         }
         // @todo add to record
         // created 2010/05/18
         $date = str_replace("/", "-", $a[30]) . "T00:00:00Z";
         parent::addRDF(parent::triplifyString($iid, "dc:created", $date, "xsd:dateTime"));
         // taxon-sequence identical interaction group
         parent::addRDF(parent::triplify($iid, parent::getVoc() . "taxon-sequence-identical-interaction", $rigid) . parent::triplify($rigid, "rdf:type", parent::getVoc() . "Taxon-Sequence-Identical-Interaction") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Identical-Interaction", "Taxon + Sequence Identical Interaction") . parent::triplify($rigid, parent::getVoc() . "irigid", $irigid) . parent::triplify($rigid, parent::getVoc() . "interactor-a", $rogida) . parent::triplify($rogida, parent::getVoc() . "irogid", $irogida) . parent::triplify($rigid, parent::getVoc() . "interactor-b", $rogidb) . parent::triplify($rogidb, parent::getVoc() . "irogid", $irogidb) . parent::triplify($rogida, parent::getVoc() . "canonical-group", $crogida) . parent::triplify($rogidb, parent::getVoc() . "canonical-group", $crogidb) . parent::triplify($rigid, parent::getVoc() . "taxon-sequence-similar-interaction", $crigid) . parent::triplify($crigid, "rdf:type", parent::getVoc() . "Taxon-Sequence-Canonical-Interaction") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Canonical-Interaction", "Taxon + Sequence Canonical Interaction") . parent::triplify($crigid, parent::getVoc() . "icrigid", $icrigid) . parent::triplify($crigid, parent::getVoc() . "interactor-a-canonical-group", $crogida) . parent::triplify($crogida, "rdf:type", parent::getVoc() . "Taxon-Sequence-Similar-Group") . parent::triplify($crogida, parent::getVoc() . "icrogid", $icrogida) . parent::triplify($crigid, parent::getVoc() . "interactor-b-canonical-group", $crogidb) . parent::triplify($crogidb, "rdf:type", parent::getVoc() . "Taxon-Sequence-Similar-Group") . parent::triplify($crogidb, parent::getVoc() . "icrogid", $icrogidb) . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Similar-Group", "Taxon + Sequence Similar Group"));
         parent::writeRDFBufferToWriteFile();
     }
 }
Exemplo n.º 10
0
 function AddCategory(&$x, $id, $list_name, $item_name, $predicate, $list_item_name = null)
 {
     if (isset($x->{$list_name})) {
         foreach ($x->{$list_name} as $item) {
             if (isset($item->{$item_name}) && $item->{$item_name} != '') {
                 $l = $item->{$item_name};
                 $att = $l->attributes();
                 foreach ($l as $key => $item_value) {
                     $kid = parent::getvoc() . md5($item_value);
                     $this->addRDF($this->describeIndividual($kid, "" . $item_value, parent::getVoc() . ucfirst($item_name)) . $this->describeClass(parent::getVoc() . ucfirst($item_name), ucfirst("" . $item_name)) . $this->triplify($id, $predicate, $kid));
                     foreach ($att as $ka => $va) {
                         parent::addRDF($this->triplifyString($kid, parent::getVoc() . $ka, "" . $va));
                     }
                 }
                 $kid = parent::getvoc() . md5($l->asXML());
                 foreach ($l->children() as $k2 => $v2) {
                     $this->addRDF($this->describeIndividual($kid, $k2 == "name" ? $v2 : $predicate, parent::getVoc() . ucfirst($k2)) . $this->describeClass(parent::getVoc() . ucfirst($k2), ucfirst("" . $v2)) . $this->triplifyString($kid, parent::getVoc() . $k2, $v2) . $this->triplify($id, $predicate, $kid));
                 }
             }
         }
     }
 }
Exemplo n.º 11
0
 private function citations()
 {
     while ($l = $this->getReadFile()->read(2000000)) {
         $a = explode("\t|\t", rtrim($l, "\t|\n"));
         if (!isset($a[1]) or !isset($a[2])) {
             continue;
         }
         $c = parent::getRes() . "citation-id-" . $a[0];
         $seealso = isset($a[4]) ? trim($a[4]) : "";
         if ($seealso) {
             $seealso = str_replace(array("lx: DOI ", "http;//"), array("http://dx.doi.org/", "http://"), $seealso);
             if (strlen($seealso) > 2 and !strstr($seealso, "http")) {
                 $seealso = "http://" . $seealso;
             }
             $seealso = parent::triplify($c, "rdfs:seeAlso", $seealso);
         }
         parent::addRDF(parent::describeIndividual($c, $a[1], $this->getVoc() . "Citation") . parent::describeClass($this->getVoc() . "Citation", "Citation") . parent::triplifyString($c, parent::getVoc() . "citation-key", $a[1]) . ($a[2] == "0" ? "" : parent::triplify($c, parent::getVoc() . "x-pubmed", "pubmed:" . $a[2])) . $seealso . ((isset($a[5]) and $a[5]) ? parent::triplifyString($c, parent::getVoc() . "text", str_replace("\"", "", $a[5])) : ""));
         if (isset($a[6])) {
             $taxids = explode(" ", trim($a[6]));
             if (count($taxids)) {
                 foreach ($taxids as $taxid) {
                     parent::addRDF(parent::triplify("taxonomy:{$taxid}", $this->getVoc() . "citation", $c));
                 }
             }
         }
         $this->writeRDFBufferToWriteFile();
     }
     //while
 }
Exemplo n.º 12
0
 function genes($file)
 {
     $xml = new CXML($file);
     while ($xml->parse("DisorderList") == TRUE) {
         $x = $xml->GetXMLRoot();
         foreach ($x->Disorder as $d) {
             $orphanet_id = parent::getNamespace() . (string) $d->OrphaNumber;
             $disorder_name = (string) $d->Name;
             foreach ($d->DisorderGeneAssociationList->DisorderGeneAssociation as $dga) {
                 // gene
                 $gene = $dga->Gene;
                 $gene_id = parent::getNamespace() . (string) $gene->OrphaNumber;
                 $gene_internal_id = (string) $gene->attributes()->id;
                 $gene_label = (string) $gene->Name;
                 $gene_symbol = (string) $gene->Symbol;
                 parent::addRDF(parent::describeIndividual($gene_id, $gene_label, parent::getVoc() . "Gene") . parent::describeClass(parent::getVoc() . "Gene", "orphanet gene") . parent::triplifyString($gene_id, parent::getVoc() . "symbol", $gene_symbol));
                 foreach ($gene->SynonymList as $s) {
                     $synonym = (string) $s->Synonym;
                     parent::addRDF(parent::triplifyString($gene_id, parent::getVoc() . "synonym", $synonym));
                 }
                 foreach ($gene->ExternalReferenceList as $erl) {
                     $er = $erl->ExternalReference;
                     $db = (string) $er->Source;
                     $db = parent::getRegistry()->getPreferredPrefix($db);
                     $id = (string) $er->Reference;
                     $xref = "{$db}:{$id}";
                     parent::addRDF(parent::triplify($gene_id, parent::getVoc() . "x-{$db}", $xref));
                 }
                 $dga_id = parent::getRes() . (string) $d->OrphaNumber . "_" . md5($dga->asXML());
                 $ga = $dga->DisorderGeneAssociationType;
                 $ga_id = parent::getNamespace() . (string) $ga->attributes()->id;
                 $ga_label = (string) $ga->Name;
                 $s = $dga->DisorderGeneAssociationStatus;
                 $s_id = parent::getNamespace() . (string) $s->attributes()->id;
                 $s_label = (string) $s->Name;
                 parent::addRDF(parent::describeIndividual($dga_id, "{$ga_label} {$gene_label} in {$disorder_name} ({$s_label})", $ga_id) . parent::describeClass($ga_id, $ga_label, parent::getVoc() . "Disorder-Gene-Association") . parent::triplify($dga_id, parent::getVoc() . "status", $s_id) . parent::describeClass($s_id, $s_label, parent::getVoc() . "Disorder-Gene-Association-Status") . parent::triplify($dga_id, parent::getVoc() . "disorder", $orphanet_id) . parent::describeIndividual($orphanet_id, $disorder_name, parent::getVoc() . "Disorder") . parent::triplify($dga_id, parent::getVoc() . "gene", $gene_id));
             }
             parent::writeRDFBufferToWriteFile();
         }
     }
     unset($xml);
 }
Exemplo n.º 13
0
 function freq()
 {
     $cols = 10;
     $i = 1;
     parent::setCheckpoint('file');
     while ($l = parent::getReadFile()->read()) {
         $a = explode("\t", str_replace("%", "", $l));
         if (count($a) != $cols) {
             trigger_error("Expecting {$cols}, but found " . count($a) . " instead... skipping file!", E_USER_ERROR);
             return false;
         }
         list($stitch_flat, $stitch_stereo, $cui, $placebo, $freq, $freq_lower, $freq_upper, $concept_type, $meddra_concept_id, $meddra_concept_label) = $a;
         if ($concept_type == "LLT") {
             continue;
         }
         $meddra_concept_label = trim($meddra_concept_label);
         $id = "stitch_resource:" . md5("se_freq" . $l);
         $stitch_flat = "stitch:{$stitch_flat}";
         $label = "{$meddra_concept_label} frequency for {$stitch_flat}";
         parent::addRDF(parent::describeIndividual($id, $label, parent::getVoc() . "Drug-Effect-Frequency") . parent::describeClass(parent::getVoc() . "Drug-Effect-Frequency", "SIDER Drug-Effect and Frequency") . parent::triplify($id, parent::getVoc() . "drug", $stitch_flat) . parent::triplify($id, parent::getVoc() . "effect", "umls:" . $meddra_concept_id));
         if ($placebo) {
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "placebo", "true", "xsd:boolean"));
         }
         $number = false;
         if (is_numeric($freq)) {
             $flabel = $freq . "%";
             $ftype_label = "Exact-Frequency";
             $ftype = parent::getVoc() . $ftype_label;
             $number = true;
         } else {
             $flabel = $freq;
             $ftype_label = "Qualitative-Frequency";
             $ftype = parent::getVoc() . "{$ftype_label}";
         }
         if ($freq_lower != $freq_upper) {
             $flabel .= "({$freq_lower}-{$freq_upper})";
             $ftype_label = "Range-Frequency";
             $ftype = parent::getVoc() . $ftype_label;
         }
         $fid = $id . md5($a[5] . $a[6] . $a[8]);
         parent::addRDF(parent::triplify($id, parent::getVoc() . "frequency", $fid) . parent::describeIndividual($fid, $flabel, $ftype) . parent::describeClass($ftype, $ftype_label));
         if ($number == true) {
             parent::addRDF(parent::triplifyString($fid, parent::getVoc() . "frequency-value", $freq / 100));
         } else {
             parent::addRDF(parent::triplifyString($fid, parent::getVoc() . "frequency-value", $freq));
         }
         parent::addRDF(parent::triplifyString($fid, parent::getVoc() . "lower-frequency", sprintf("%.3f", $freq_lower)) . parent::triplifyString($fid, parent::getVoc() . "upper-frequency", sprintf("%.3f", $freq_upper)));
         parent::setCheckpoint('record');
     }
     parent::setCheckpoint('file');
 }
Exemplo n.º 14
0
 function parse($file)
 {
     $xml = new CXML($file);
     $xml->parse();
     $entry = $xml->getXMLRoot();
     if (!isset($entry) or !$entry) {
         return false;
     }
     foreach ($entry->children() as $o) {
         $rsid = "rs" . $o->attributes()->rsId;
         $id = parent::getNamespace() . $rsid;
         $type = parent::getVoc() . ucfirst(str_replace(" ", "-", (string) $o->attributes()->snpClass));
         $snpclass = parent::getVoc() . (string) $o->attributes()->snpClass;
         $moltype = parent::getVoc() . (string) $o->attributes()->molType;
         // attributes
         parent::addRDF(parent::describeIndividual($id, $rsid, $type) . parent::describeClass($type, ucfirst("" . $o->attributes()->snpClass)) . parent::triplify($id, parent::getVoc() . "mol-type", $moltype) . parent::describeClass($moltype, (string) $o->attributes()->molType, parent::getVoc() . "Moltype") . parent::describeClass(parent::getVoc() . "Moltype", "Moltype") . parent::triplify($id, parent::getVoc() . "taxid", "taxonomy:" . (string) $o->attributes()->taxId));
         $genotype = (string) $o->attributes()->genoType;
         if ($genotype) {
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "genotype", parent::getVoc() . $genotype, "xsd:bool"));
         }
         // frequency
         // create/update
         /*			if(!isset($o->Update)) $a = $o->Create;
         			else $a = $o->Update;
         			parent::addRDF(parent::triplifyString($id,parent::getVoc()."build",(string) $a->attributes()->build));
         */
         //validation
         $a = $o->Validation;
         parent::addRDF(parent::triplifyString($id, parent::getVoc() . "validation-by-cluster", (string) $a->attributes()->byCluster) . parent::triplifyString($id, parent::getVoc() . "validation-by-frequency", (string) $a->attributes()->byFrequency) . parent::triplifyString($id, parent::getVoc() . "validation-by-2hit2allele", (string) $a->attributes()->by2Hit2Allele) . parent::triplifyString($id, parent::getVoc() . "validation-by-1000G", (string) $a->attributes()->by1000G));
         //hgvs names
         foreach ($o->hgvs as $name) {
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "hgvs-name", (string) $name));
         }
         // assembly
         $assembly = $o->Assembly;
         if ($assembly and $assembly->attributes()->reference == "true") {
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "dbsnp-build", (string) $assembly->attributes()->dbSnpBuild) . parent::triplifyString($id, parent::getVoc() . "genome-build", (string) $assembly->attributes()->genomeBuild));
             $component = $assembly->Component;
             if ($component) {
                 parent::addRDF(parent::triplify($id, parent::getVoc() . "contig-accession", "genbank:" . (string) $component->attributes()->accession) . parent::triplify($id, parent::getVoc() . "contig-gi", "gi:" . (string) $component->attributes()->gi) . parent::triplifyString($id, parent::getVoc() . "chromosome", (string) $component->attributes()->chromosome));
                 $maploc = $component->MapLoc;
                 if ($maploc) {
                     foreach ($maploc->children() as $fxnset) {
                         $fxnset_id = parent::getRes() . md5($fxnset->asXML());
                         parent::addRDF(parent::triplify($id, parent::getVoc() . "maps-to", $fxnset_id) . parent::triplify($fxnset_id, "rdf:type", parent::getVoc() . "Fxnset") . parent::describeClass(parent::getVoc() . "Fxnset", "Fxnset"));
                         if (isset($fxnset->attributes()->geneId)) {
                             parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "gene", "ncbigene:" . (string) $fxnset->attributes()->geneId));
                         }
                         if (isset($fxnset->attributes()->symbol)) {
                             parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "gene-symbol", (string) $fxnset->attributes()->symbol));
                         }
                         if (isset($fxnset->attributes()->mrnaAcc)) {
                             parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "mrna", "refseq:" . (string) $fxnset->attributes()->mrnaAcc));
                         }
                         if (isset($fxnset->attributes()->protAcc)) {
                             parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "protein", "refseq:" . (string) $fxnset->attributes()->protAcc));
                         }
                         if (isset($fxnset->attributes()->fxnClass)) {
                             parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "fxn-class", (string) $fxnset->attributes()->fxnClass));
                         }
                         if (isset($fxnset->attributes()->allele)) {
                             parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "allele", (string) $fxnset->attributes()->allele));
                         }
                         if (isset($fxnset->attributes()->residue)) {
                             parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "residue", (string) $fxnset->attributes()->residue));
                         }
                         if (isset($fxnset->attributes()->readingFrame)) {
                             parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "reading-frame", (string) $fxnset->attributes()->readingFrame));
                         }
                         if (isset($fxnset->attributes()->aaPosition)) {
                             parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "position", (string) $fxnset->attributes()->aaPosition));
                         }
                     }
                 }
             }
         }
     }
     unset($xml);
 }
Exemplo n.º 15
0
 function Run()
 {
     // get the work
     if ($this->GetParameterValue('files') == 'all') {
         $sources = explode("|", parent::getParameterList('files'));
         array_shift($sources);
     } else {
         // comma separated list
         $sources = explode(",", parent::getParameterValue('files'));
     }
     $download_files = array("h**o-sapiens" => "Pathway%20Commons%202%20homo%20sapiens.BIOPAX.owl.gz", "hprd" => "Pathway%20Commons%202%20HPRD.BIOPAX.owl.gz", "humancyc" => "Pathway%20Commons%202%20HumanCyc.BIOPAX.owl.gz", "nci-nature" => "Pathway%20Commons%202%20NCI_Nature.BIOPAX.owl.gz", "panther-pathway" => "Pathway%20Commons%202%20PANTHER%20Pathway.BIOPAX.owl.gz", "phosphositeplus" => "Pathway%20Commons%202%20PhosphoSitePlus.BIOPAX.owl.gz", "reactome" => "Pathway%20Commons%202%20Reactome.BIOPAX.owl.gz");
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $dataset_description = '';
     // iterate over the requested data
     foreach ($sources as $source) {
         echo "processing {$source}... ";
         $ldir = parent::getParameterValue('indir');
         $odir = parent::getParameterValue('outdir');
         $rdir = parent::getParameterValue('download_url');
         // set the remote and input files
         $file = $source . ".owl";
         $zfile = $source . ".owl.gz";
         $rfile = $rdir . $download_files[$source];
         $lfile = $ldir . $zfile;
         // download if if the file doesn't exist locally or we are told to
         if (!file_exists($lfile) || $this->GetParameterValue('download') == 'true') {
             // download
             echo "downloading... ";
             file_put_contents($lfile, file_get_contents($rfile));
         }
         // extract the file out of the ziparchive
         // and load into a buffer
         echo 'extracting... ';
         if (($fpin = gzopen($lfile, "r")) === FALSE) {
             trigger_error("Unable to open {$lfile}", E_USER_ERROR);
             exit;
         }
         $data = '';
         while (!gzeof($fpin)) {
             $buffer = gzgets($fpin, 4096);
             $data .= $buffer;
         }
         gzclose($fpin);
         // set the output file
         $suffix = parent::getParameterValue('output_format');
         $outfile = $source . '.' . $suffix;
         $gz = false;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         parent::setWriteFile($odir . $outfile, $gz);
         // send for parsing
         $p = new BioPAX2Bio2RDF($this);
         $p->SetBuffer($data)->SetBioPAXVersion(3)->SetBaseNamespace("http://purl.org/pc2/3/")->SetBio2RDFNamespace("http://bio2rdf.org/pathwaycommons:")->SetDatasetURI(parent::getDatasetURI());
         $rdf = $p->Parse();
         parent::addRDF($rdf);
         // write to output
         parent::writeRDFBufferToWriteFile();
         parent::getWriteFile()->Close();
         echo "done!" . PHP_EOL;
         //generate dataset description
         echo "Generating dataset description for {$zfile}... ";
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Pathway Commons")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("rdf/xml")->setPublisher("http://www.pathwaycommons.org/")->setHomepage("http://www.pathwaycommons.org/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("http://www.pathwaycommons.org/pc2/home.html#data_sources")->setDataset("http://identifiers.org/pathwaycommons/");
         $dataset_description .= $source_file->toRDF();
         echo "done!" . PHP_EOL;
     }
     echo "Generating dataset description for Bio2RDF Pathways Commons dataset... ";
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pathwaycommons/pathwaycommons.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description .= $output_file->toRDF();
     //write dataset description to file
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Exemplo n.º 16
0
 function parseKGML($lfile)
 {
     $pathway = simplexml_load_file($lfile);
     if ($pathway === false) {
         echo "Error in parsing {$lfile}" . PHP_EOL;
         return;
     }
     $pathway_id = str_replace("path", "kegg", $pathway['name']);
     $base_id = str_replace("kegg", "kegg_resource", $pathway_id) . ".";
     parent::addRDF(parent::describeIndividual($pathway_id, $pathway['title'], parent::getVoc() . "Pathway") . parent::triplify($pathway_id, "rdfs:seeAlso", $pathway['link']) . parent::triplify($pathway_id, "foaf:depiction", $pathway['image']));
     // get the entries
     foreach ($pathway->children() as $type => $item) {
         if ($type == "entry") {
             $eid = $base_id . $item['id'];
             $entries["" . $item['id']] = "" . $item['name'];
             parent::addRDF(parent::describeIndividual($eid, $item['name'], parent::getVoc() . "Ortholog-Group") . parent::describeClass(parent::getVoc() . "Ortholog-Group", "KEGG Ortholog Group"));
             $mids = explode(" ", $item['name']);
             foreach ($mids as $mid) {
                 if ($item['type'] == 'path') {
                     $mid = str_replace($mid, ":", "_");
                 } else {
                     $mid = substr($mid, strpos($mid, ":") + 1);
                 }
                 parent::addRDF(parent::triplify($eid, parent::getVoc() . "member", "kegg:" . $mid));
             }
         }
     }
     // iterate over the relations, reactions
     foreach ($pathway->children() as $type => $item) {
         if ($type == "relation") {
             /*
             	<relation entry1="70" entry2="73" type="ECrel">
             		<subtype name="compound" value="86"/>
             	</relation>
                 <relation entry1="26" entry2="25" type="PPrel">
             		<subtype name="compound" value="17"/>
             		<subtype name="activation" value="--&gt;"/>
             	</relation>
             */
             $id1 = "" . $item['entry1'];
             $id2 = "" . $item['entry2'];
             $type = "" . $type;
             $relation_id = str_replace("kegg", "kegg_resource", $pathway_id) . "." . $id1 . "." . $id2 . "." . $type;
             $label = $type . " relation between " . $entries[$id1] . " and " . $entries[$id2];
             parent::addRDF(parent::describeIndividual($relation_id, $label, parent::getVoc() . "Pathway-Relation") . parent::describeClass(parent::getVoc() . "Pathway-Relation", "KEGG Pathway Relation") . parent::triplify($relation_id, parent::getVoc() . "source", $base_id . $id1) . parent::triplify($relation_id, parent::getVoc() . "target", $base_id . $id2) . parent::triplify($relation_id, parent::getVoc() . "pathway", $pathway_id) . parent::triplifyString($relation_id, parent::getVoc() . "type", $item['type']));
             foreach ($item->children() as $subtype) {
                 parent::addRDF(parent::triplifyString($relation_id, parent::getVoc() . "subtype", '' . $subtype['name']));
             }
         } else {
             if ($type == "reaction") {
                 /*     <reaction id="133" name="rn:R09085" type="irreversible">
                 							<substrate id="86" name="cpd:C00267"/>
                 							<product id="90" name="cpd:C00668"/>
                 						</reaction>
                 				*/
                 $reaction_id = str_replace("kegg", "kegg_resource", $pathway_id) . "." . substr($item['name'], strpos($item['name'], ":") + 1);
                 $reaction_type = parent::getVoc() . ucfirst($item['type']) . "-Reaction";
                 parent::addRDF(parent::describeIndividual($reaction_id, $item['name'], parent::getVoc() . "Reaction") . parent::describeClass(parent::getVoc() . "Reaction", "KEGG Reaction") . parent::triplify($reaction_id, "rdf:type", $reaction_type));
                 foreach ($item->children() as $k => $v) {
                     $cid = str_replace("cpd:", "kegg:", $v['name']);
                     parent::addRDF(parent::triplify($reaction_id, parent::getVoc() . $k, $cid));
                 }
             }
         }
     }
     return;
 }
Exemplo n.º 17
0
 function models()
 {
     $tax_ids = array("Caenorhabditis elegans" => "6239", "Mus musculus" => "10090", "Saccharomyces cerevisiae" => "4932", "Drosophila melanogaster" => "7227", "Podospora anserina" => "5145", "Mesocricetus auratus" => "10036", "Schizosaccharomyces pombe" => "4896", "Danio rerio" => "7955");
     $h = explode(",", parent::getReadFile()->read());
     $expected_columns = 8;
     if (($n = count($h)) != $expected_columns) {
         trigger_error("Found {$n} columns in gene file - expecting {$expected_columns}!", E_USER_WARNING);
         return false;
     }
     /*
     [0] GenAge ID
     [1] symbol	
     [2] name	
     [3] organism	
     [4] entrez gene id	
     [5] avg lifespan change (max obsv)	
     [6] lifespan effect	
     [7] longevity influence
     */
     while ($l = parent::getReadFile()->read(200000)) {
         $data = str_getcsv($l);
         $genage = str_pad($data[0], 4, "0", STR_PAD_LEFT);
         $gene_symbol = $data[1];
         $name = $data[2];
         $organism = $data[3];
         $ncbi_gene_id = $data[4];
         $max_percent_obsv_avg_lifespan_change = $data[5];
         $lifespan_effect = $data[6];
         $longevity_influence = $data[7];
         $genage_id = parent::getNamespace() . $genage;
         parent::addRDF(parent::describeIndividual($genage_id, $name, parent::getVoc() . "Aging-Related-Gene") . parent::describeClass(parent::getVoc() . "Aging-Related-Gene", "Aging Related Gene"));
         parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "gene-symbol", parent::safeLiteral($gene_symbol)));
         parent::addRDF(parent::triplify($genage_id, parent::getVoc() . "taxon", "ncbitaxon:" . $tax_ids[$organism]));
         if ($ncbi_gene_id !== "") {
             parent::addRDF(parent::triplify($genage_id, parent::getVoc() . "x-ncbigene", "ncbigene:" . $ncbi_gene_id));
         }
         if ($max_percent_obsv_avg_lifespan_change !== "") {
             parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "maximum-percent-observed-average-lifespan-change", parent::safeLiteral($max_percent_obsv_avg_lifespan_change)));
         }
         if ($lifespan_effect == "Increase and Decrease") {
             parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", "increase") . parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", "decrease"));
         } else {
             parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", strtolower($lifespan_effect)));
         }
         parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "longevity-influence", strtolower($longevity_influence)));
         parent::WriteRDFBufferToWriteFile();
     }
 }
Exemplo n.º 18
0
 function MGI_Geno_NotDisease()
 {
     $line = 1;
     while ($l = $this->getReadFile()->read(248000)) {
         $a = explode("\t", $l);
         if (count($a) != 8) {
             trigger_error("Incorrect number of columns", E_USER_WARNING);
             continue;
         }
         $genotype = $a[0];
         $alleles = explode("|", strtolower($a[2]));
         $diseases = explode(",", $a[7]);
         foreach ($diseases as $d) {
             $disease = "omim:{$d}";
             foreach ($alleles as $allele) {
                 $id = parent::getRes() . md5($allele . $disease);
                 $label = "{$allele} {$disease} absent association";
                 parent::addRDF(parent::describeIndividual($id, $label, $this->getVoc() . "Allele-Disease-Non-Association") . parent::describeClass($this->getVoc() . "Allele-Disease-Non-Association", "MGI Allele-Disease Non-Association") . parent::triplify($id, $this->getVoc() . "allele", $allele) . parent::triplifyString($id, $this->getVoc() . "genotype-string", $genotype) . parent::triplify($id, $this->getVoc() . "disease", $disease) . parent::triplifyString($id, $this->getVoc() . "is-negated", "true"));
                 if ($a[5]) {
                     $pmids = explode(",", $a[5]);
                     foreach ($pmids as $pmid) {
                         parent::addRDF(parent::triplify($id, $this->getVoc() . "x-pubmed", "pubmed:" . $pmid));
                     }
                 }
             }
         }
         $this->writeRDFBufferToWriteFile();
     }
 }
Exemplo n.º 19
0
 function twosides()
 {
     $items = null;
     $id = 0;
     $this->GetReadFile()->Read();
     while ($l = $this->GetReadFile()->Read()) {
         $a = explode("\t", $l);
         $id++;
         if ($id % 10000 == 0) {
             $this->WriteRDFBufferToWriteFile();
         }
         $uid = "twosides:{$id}";
         $d1 = "pubchemcompound:" . (int) sprintf("%d", substr($a[0], 4));
         $d1_name = $a[2];
         $d2 = "pubchemcompound:" . (int) sprintf("%d", substr($a[1], 4));
         $d2_name = $a[3];
         $e = "umls:" . $a[4];
         $e_name = strtolower($a[5]);
         $uid_label = "DDI between {$d1_name} and {$d2_name} leading to {$e_name}";
         if (!isset($items[$d1])) {
             parent::addRDF(parent::describeIndividual($d1, $d1_name, parent::getVoc() . "Chemical") . parent::describeClass(parent::getVoc() . "Chemical", "PharmGKB Chemical"));
             $items[$d1] = '';
         }
         if (!isset($items[$d2])) {
             parent::addRDF(parent::describeIndividual($d2, $d2_name, parent::getVoc() . "Chemical") . parent::describeClass(parent::getVoc() . "Chemical", "PharmGKB Chemical"));
             $items[$d2] = '';
         }
         if (!isset($items[$e])) {
             parent::addRDF(parent::describeIndividual($e, $e_name, parent::getVoc() . "Event") . parent::describeClass(parent::getVoc() . "Event", "PharmGKB side effect event"));
             $items[$e] = '';
         }
         parent::addRDF(parent::describeIndividual($uid, $uid_label, parent::getVoc() . "Drug-Drug-Association") . parent::describeClass(parent::getVoc() . "Drug-Drug-Association", "PharmGKB Twosides Drug-Drug Association") . parent::triplify($uid, parent::getVoc() . "chemical", $d1) . parent::triplify($uid, parent::getVoc() . "chemical", $d2) . parent::triplify($uid, parent::getVoc() . "event", $e) . parent::triplifyString($uid, parent::getVoc() . "p-value", $a[7]));
     }
     parent::writeRDFBufferToWriteFile();
 }
Exemplo n.º 20
0
 function gene_expression()
 {
     $h = explode(",", parent::getReadFile()->read());
     $expected_columns = 8;
     if (($n = count($h)) != $expected_columns) {
         trigger_error("Found {$n} columns in gene file - expecting {$expected_columns}!", E_USER_WARNING);
         return false;
     }
     while ($l = parent::getReadFile()->read(200000)) {
         $data = str_getcsv($l);
         $mgi_symbol = $data[0];
         $mgi_description = $data[1];
         $geneid = $data[2];
         $total_datasets = $data[3];
         $total_ovexp = $data[4];
         $total_underexp = $data[5];
         $p_value = $data[6];
         $expression = $data[7];
         $id = parent::getRes() . md5($geneid . $total_datasets . $total_ovexp . $total_underexp . $p_value . $expression);
         $evidence_id = parent::getRes() . md5($geneid . $total_datasets . $total_ovexp . $total_underexp . $p_value . $expression . "_evidence");
         $label = "Dietary restriction induced " . $expression . "-expression of " . $mgi_symbol . " based on microarray results from " . $total_datasets . " datasets, with p-value " . $p_value;
         $type_label = "Gene " . ucfirst($expression) . " Expression";
         $type = parent::getVoc() . str_replace(" ", "-", $type_label);
         parent::addRDF(parent::describeIndividual($id, $label, $type) . parent::describeClass($type, $type_label) . parent::triplify($id, parent::getVoc() . "gene", "ncbigene:" . $geneid) . parent::triplifyString("ncbigene:" . $geneid, parent::getVoc() . "mgi-gene-symbol", $mgi_symbol) . parent::triplifyString("ncbigene:" . $geneid, parent::getVoc() . "mgi-gene-description", $mgi_description) . parent::triplify($id, parent::getVoc() . "evidence", $evidence_id) . parent::triplifyString($id, parent::getVoc() . "perturbation-context", "dietary restriction") . parent::triplifyString($evidence_id, parent::getVoc() . "total-number-datasets", $total_datasets) . parent::triplifyString($evidence_id, parent::getVoc() . "total-number-datasets-overexpressed", $total_ovexp) . parent::triplifyString($evidence_id, parent::getVoc() . "total-number-datasets-underexpressed", $total_underexp) . parent::triplifyString($evidence_id, parent::getVoc() . "p-value", $p_value));
         parent::writeRDFBufferToWriteFile();
     }
     //while
 }
Exemplo n.º 21
0
 private function gene2accession()
 {
     $this->getReadFile()->read(200000);
     $header = array(0 => array('rel' => "x-taxonomy", 'ns' => "taxonomy"), 1 => array('rel' => "ncbigene", 'ns' => "ncbigene"), 2 => array('rel' => "status"), 3 => array('rel' => "rna-nucleotide-accession.version", 'ns' => "genbank"), 4 => array('rel' => "rna-nucleotide-gi", 'ns' => "gi"), 5 => array('rel' => "protein-accession.version", 'ns' => "genbank"), 6 => array('rel' => "protein-gi", 'ns' => "gi"), 7 => array('rel' => "genomic-nucleotide-accession.version", 'ns' => "genbank"), 8 => array('rel' => "genomic-nucleotide-gi", 'ns' => "gi"), 9 => array('rel' => "genomic-start-position"), 10 => array('rel' => "genomic-end-position"), 11 => array('rel' => "orientation"), 12 => array('rel' => "assembly"), 13 => array('rel' => "mature-peptide-accession.version", 'ns' => "genbank"), 14 => array('rel' => "mature-peptide-gi", 'ns' => "gi"), 15 => array('rel' => "symbol"));
     //(tab is used as a separator, pound sign - start of a comment) */
     $z = 1;
     while ($l = $this->getReadFile()->read(200000)) {
         if ($l[0] == "#") {
             continue;
         }
         if ($z++ % 10000 == 0) {
             echo $z . PHP_EOL;
             parent::clear();
         }
         $a = explode("\t", rtrim($l));
         if (count($a) != 16) {
             trigger_error("gene2accession: expecting 16 columns, found " . count($a) . " instead", E_USER_ERROR);
         }
         $taxid = $a[0];
         if (isset($this->taxids) and !isset($this->taxids[$taxid])) {
             continue;
         }
         $id = parent::getNamespace() . $a[1];
         $refseq = false;
         if ($a[2] != '-') {
             $refseq = true;
         }
         if ($a[9] != '-' and $a[10] != '-') {
             $region = parent::getRes() . $a[7] . "/" . $a[9] . "-" . $a[10];
             $start_pos = parent::getRes() . $a[7] . "/" . $a[9];
             $stop_pos = parent::getRes() . $a[7] . "/" . $a[10];
             if ($a[11] == "+") {
                 $orientation = "faldo:ForwardStrandPosition";
             } else {
                 if ($a[11] == "-") {
                     $orientation = "faldo:ReverseStrandPosition";
                 } else {
                     $orientation = "faldo:StrandedPosition";
                 }
             }
             parent::addRDF(parent::describeIndividual($region, "location of ncbigene:" . $a[1] . " on " . $a[7], "faldo:Region") . parent::describeIndividual($start_pos, "start of ncbigene:" . $a[1] . " on " . $a[7], "faldo:ExactPosition") . parent::describeIndividual($stop_pos, "stop position of ncbigene:" . $a[1] . " on " . $a[7], "faldo:ExactPosition") . parent::triplify($id, "faldo:location", $region) . parent::triplify($region, "faldo:begin", $start_pos) . parent::triplify($start_pos, "rdf:type", $orientation) . parent::triplifyString($start_pos, "faldo:position", $a[9], "xsd:integer") . parent::triplify($start_pos, "faldo:reference", "refseq:" . $a[7]) . parent::triplify($region, "faldo:end", $stop_pos) . parent::triplify($stop_pos, "rdf:type", $orientation) . parent::triplifyString($stop_pos, "faldo:position", $a[10], "xsd:integer") . parent::triplify($stop_pos, "faldo:reference", "refseq:" . $a[7]));
         }
         foreach ($header as $i => $v) {
             if ($a[$i] == "-") {
                 continue;
             }
             if ($i == 1 or $i == 9 or $i == 10 or $i == 11) {
                 continue;
             }
             /// ncbigene
             if (isset($v['ns'])) {
                 $ns = $v['ns'];
                 if ($ns == 'genbank' and $refseq == true) {
                     $ns = 'refseq';
                 }
                 parent::addRDF(parent::triplify($id, parent::getVoc() . $v['rel'], "{$ns}:" . $a[$i]));
             } else {
                 parent::addRDF(parent::triplifyString($id, parent::getVoc() . $v['rel'], $a[$i]));
             }
         }
         parent::writeRDFBufferToWriteFile();
     }
     //while
 }
Exemplo n.º 22
0
 function process($file)
 {
     $z = 1;
     while ($l = parent::getReadFile()->read(100000)) {
         if ($z % 100000 == 0) {
             parent::clear();
         }
         if ($l[0] == "!") {
             continue;
         }
         $fields = explode("\t", $l);
         if (count($fields) != 17) {
             trigger_error("Expected 17 columns, but found " . count($fields), E_USER_ERROR);
             return false;
         }
         //get the Go id
         $db = $fields[0];
         $id = $fields[1];
         $symbol = $fields[2];
         $qualifier = $fields[3];
         $goid = substr($fields[4], 3);
         $refs = $this->getDbReferences($fields[5]);
         $eco = $this->getEvidenceCodeLabelArr($fields[6]);
         $aspect = $this->getAspect($fields[8]);
         $label = $fields[9];
         $synonyms = explode("|", $fields[10]);
         $taxid = $fields[12];
         $date = $this->parseDate($fields[13]);
         $assignedBy = $fields[14];
         //entity id
         $eid = $this->getdbURI($db, $id);
         if (!$eid) {
             print_r($fields);
             continue;
         }
         parent::addRDF(parent::describeIndividual($eid, $label, parent::getVoc() . "GO-Annotation") . parent::describeClass(parent::getVoc() . "GO-Annotation", "GO Annotation") . parent::triplifyString($eid, parent::getVoc() . "symbol", $symbol));
         parent::addRDF(parent::triplify($eid, parent::getVoc() . "x-taxonomy", $taxid));
         foreach ($synonyms as $s) {
             if (!empty($s)) {
                 parent::addRDF(parent::triplifyString($eid, parent::getVoc() . "synonym", $s));
             }
         }
         $rel = $aspect;
         if ($qualifier == 'NOT') {
             if ($aspect == 'process') {
                 $rel = 'not-in-process';
             }
             if ($aspect == 'function') {
                 $rel = 'not-has-function';
             }
             if ($aspect == 'component') {
                 $rel = 'not-in-component';
             }
         }
         parent::addRDF(parent::describeObjectProperty(parent::getVoc() . $rel, str_replace("-", " ", $rel)) . parent::triplify($eid, parent::getVoc() . $rel, "go:" . $goid));
         $type = key($eco);
         $aid = parent::getRes() . $file . "_" . $z++;
         parent::addRDF(parent::describeObjectProperty(parent::getVoc() . "go-annotation", "GO annotation") . parent::triplify($eid, parent::getVoc() . "go-annotation", $aid));
         $cat = parent::getRes() . md5($aspect);
         parent::addRDF(parent::describeIndividual($aid, "{$id}-go:{$goid} association", parent::getVoc() . "GO-Annotation") . parent::triplify($aid, parent::getVoc() . "target", $eid) . parent::triplify($aid, parent::getVoc() . "go-term", "go:" . $goid) . parent::triplify($aid, parent::getVoc() . "evidence", "eco:" . $eco[$type][1]) . parent::triplify($aid, parent::getVoc() . "go-category", $cat) . parent::describeClass($cat, $aspect) . parent::triplifyString($aid, parent::getVoc() . "assigned-by", $assignedBy));
         if ($date != '') {
             parent::addRDF(parent::triplifyString($aid, parent::getVoc() . "entry-date", $date . "T00:00:00Z", "xsd:dateTime"));
         }
         foreach ($refs as $ref) {
             $b = explode(":", $ref);
             if ($b[0] == 'PMID') {
                 parent::addRDF(parent::triplify($aid, parent::getVoc() . "article", "pubmed:" . $b[1]));
             }
         }
         //write RDF to file
         parent::writeRDFBufferToWriteFile();
     }
 }
Exemplo n.º 23
0
 /**
  *	Convert pubchem substance XML record to RDF
  **/
 function parse_substance_record(&$xml)
 {
     $root = $xml->GetXMLRoot();
     // pubchem identifier and version
     $sid = array_shift($root->xpath('//PC-Substance_sid/PC-ID/PC-ID_id'));
     $sid_version = array_shift($root->xpath('//PC-Substance_sid/PC-ID/PC-ID_version'));
     $psid = $this->getPcsNs() . $sid;
     parent::addRDF(parent::describeIndividual($psid, null, $this->getPcsVoc() . "Substance"));
     parent::addRDF(parent::triplifyString($psid, $this->getPcsVoc() . "version", parent::safeLiteral($sid_version)));
     // reference to pubchem compounds
     $pc_compounds = $root->xpath('//PC-Substance_compound/PC-Compounds/PC-Compound');
     foreach ($pc_compounds as $compound) {
         $cid = array_shift($compound->xpath('./PC-Compound_id/PC-CompoundType/PC-CompoundType_id/PC-CompoundType_id_cid'));
         $cid_type = array_shift($compound->xpath('./PC-Compound_id/PC-CompoundType/PC-CompoundType_type'));
         if ($cid != "") {
             $pcid = $this->getPccNs() . $cid;
             parent::addRDF(parent::triplify($psid, $this->getPcsVoc() . "compound", $pcid));
         }
     }
     // database cross references (xref)
     // source identifier
     $source_id = array_shift($root->xpath('//PC-Substance_source/PC-Source/PC-Source_db/PC-DBTracking/PC-DBTracking_source-id/Object-id/Object-id_str'));
     parent::addRDF(parent::triplifyString($psid, $this->getPcsVoc() . "source-identifier", parent::safeLiteral($source_id)));
     // synonyms
     $synonyms = $root->xpath('//PC-Substance_synonyms/PC-Substance_synonyms_E');
     foreach ($synonyms as $synonym) {
         parent::addRDF(parent::triplifyString($psid, $this->getPcsVoc() . "synonym", parent::safeLiteral($synonym)));
     }
     //comment
     $comments = $root->xpath('//PC-Substance_comment/PC-Substance_comment_E');
     foreach ($comments as $comment) {
         if ($comment !== "") {
             parent::addRDF(parent::triplifyString($psid, "rdfs:comment", parent::safeLiteral($comment)));
         }
     }
 }
Exemplo n.º 24
0
 /**
  * add an RDF representation of the incoming param to the model.
  * @$desc_record_arr is an assoc array with the contents of one qualifier record
  */
 private function makeDescriptorRecord($desc_record_arr)
 {
     //get the UI of the descriptor record
     $dr_ui = $desc_record_arr["UI"][0];
     $dr_res = $this->getNamespace() . $dr_ui;
     $dr_label = $desc_record_arr['MH'][0];
     parent::AddRDF(parent::describeIndividual($dr_res, $dr_label, $this->getVoc() . "Descriptor", $dr_label) . parent::describeClass($this->getVoc() . "Descriptor", "MeSH Descriptor"));
     //now get the descriptor_data_elements
     $qde = $this->getDescriptorDataElements();
     //iterate over the properties
     foreach ($desc_record_arr as $k => $v) {
         if (array_key_exists($k, $qde)) {
             if ($k == "AN") {
                 foreach ($v as $kv => $vv) {
                     //explode by semicolon
                     $vvrar = explode(";", $vv);
                     foreach ($vvrar as $anAn) {
                         parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde["AN"], $anAn) . parent::describeProperty($this->getVoc() . $qde["AN"], "Relationship between a descriptor and its annotation"));
                     }
                     //foreach
                 }
                 //foreach
             }
             //if
             //add allowable topical qualifiers
             if ($k == "AQ") {
                 //$x = $this->getDescriptorDataElements();
                 foreach ($v as $kv => $vv) {
                     $vvrar = explode(" ", $vv);
                     foreach ($vvrar as $aq) {
                         $aq_res = $this->getRes() . $aq;
                         parent::AddRDF(parent::triplify($aq_res, "rdf:type", $this->getVoc() . "allowable-topical-qualifier") . parent::describeClass($this->getVoc() . "allowable-topical-qualifier", "allowable topical qualifier: " . $qde['AQ']));
                         parent::AddRDF(parent::triplify($dr_res, $this->getVoc() . $qde['AQ'], $aq_res) . parent::describeProperty($this->getVoc() . $qde['AQ'], "Relationship between a descriptor and its allowable topical qualifiers"));
                     }
                     //foreach
                 }
                 //foreach
             }
             //if
             //add CATALOGING SUBHEADINGS LIST NAME
             if ($k == "CATSH") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['CATSH'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['CATSH'], "Relationship between a descriptor and its cataloging subheadings list name"));
                 }
             }
             //if
             if ($k == "CX") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['CX'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['CATSH'], "Relationship between a descriptor and xrefs"));
                 }
             }
             //if
             //add date of entry
             if ($k == "DA") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['DA'], $this->formatDate($vv), "xsd:date") . parent::describeProperty($this->getVoc() . $qde['DA'], "Relationship between a descriptor and its date of entry"));
                 }
             }
             //if
             //descriptor class
             if ($k == "DC") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['DC'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['DC'], "Relationship between a descriptor and its descriptor class"));
                 }
             }
             //if
             //descriptor entry version
             if ($k == "DE") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['DE'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['DE'], "Relationship between a descriptor record and its entry version"));
                 }
             }
             //if
             //descriptor sort version
             if ($k == "DS") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['DS'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['DS'], "Relationship between a descriptor record and its sort version"));
                 }
             }
             //if
             //date major descriptor established
             if ($k == "DX") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['DX'], $this->formatDate($vv), "xsd:date") . parent::describeProperty($this->getVoc() . $qde['DX'], "Relationship between a descriptor and its date of major descriptor established"));
                 }
             }
             //if
             if ($k == "EC") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['EC'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['EC'], "Relationship between a descriptor and its entry combination"));
                 }
             }
             if ($k == "PRINT ENTRY") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['PRINT ENTRY'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['PRINT ENTRY'], "Relationship between a descriptor and its print entry term"));
                 }
             }
             if ($k == "ENTRY") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['ENTRY'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['ENTRY'], "Relationship between a descriptor and its entry term"));
                 }
             }
             if ($k == "FX") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['FX'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['FX'], "Relationship between a descriptor and its forward cross reference"));
                 }
             }
             if ($k == "GM") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['GM'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['GM'], "Relationship between a descriptor and its grateful med note"));
                 }
             }
             if ($k == "HN") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['HN'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['HN'], "Relationship between a descriptor record and its history note"));
                 }
             }
             if ($k == "MED") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['MED'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['MED'], "Relationship between a descriptor and its backfile postings"));
                 }
             }
             if ($k == "M94") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M94'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M94'], "Relationship between a descriptor and its backfile postings"));
                 }
             }
             if ($k == "M90") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M90'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M90'], "Relationship between a descriptor and its backfile postings"));
                 }
             }
             if ($k == "M85") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M85'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M85'], "Relationship between a descriptor record and its backfile postings"));
                 }
             }
             if ($k == "M80") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M80'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M80'], "Relationship between a descriptor record and its backfile postings"));
                 }
             }
             if ($k == "M75") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M75'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M75'], "Relationship between a descriptor record and its backfile postings"));
                 }
             }
             if ($k == "M66") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M66'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M66'], "Relationship between a descriptor record and its backfile postings"));
                 }
             }
             if ($k == "MH_TH") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['MH_TH'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['MH_TH'], "Relationship between a descriptor record and its MeSH Heading thesaurus id"));
                 }
             }
             if ($k == "MH") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['MH'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['MH'], "Relationship between a descriptor record and its MeSH Heading"));
                 }
             }
             if ($k == "MN") {
                 foreach ($v as $kv => $vv) {
                     $vid = parent::getNamespace() . $vv;
                     $vlabel = utf8_encode(htmlspecialchars($vv));
                     parent::AddRDF(parent::describeIndividual($vid, $dr_label, parent::getVoc() . "Tree-Entry", $vlabel) . parent::triplify($dr_res, $this->getVoc() . $qde['MN'], $vid) . parent::describeProperty($this->getVoc() . $qde['MN'], "Relationship between a descriptor record and its MeSH Tree Number"));
                     if (FALSE !== ($pos = strrpos($vv, "."))) {
                         $pid = parent::getNamespace() . substr($vv, 0, $pos);
                         parent::addRDF(parent::triplify($vid, "rdfs:subClassOf", $pid));
                     }
                 }
             }
             if ($k == "MR") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['MR'], $this->formatDate($vv), "xsd:date") . parent::describeProperty($this->getVoc() . $qde['MR'], "Relationship between a descriptor record and its major revision date"));
                 }
             }
             if ($k == "MS") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['MS'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['MS'], "Relationship between a descriptor record and its MeSH scope note"));
                 }
             }
             if ($k == "N1") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['N1'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['N1'], "Relationship between a descriptor record and its CAS 1 name"));
                 }
             }
             if ($k == "OL") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['OL'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['OL'], "Relationship between a descriptor record and its online note"));
                 }
             }
             if ($k == "PA") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['PA'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['PA'], "Relationship between a descriptor record and its pharmacological action"));
                 }
             }
             if ($k == "PI") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['PI'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['PI'], "Relationship between a descriptor record and its previous indexing"));
                 }
             }
             if ($k == "PM") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['PM'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['PM'], "Relationship between a descriptor record and its public mesh note"));
                 }
             }
             if ($k == "PX") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['PX'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['PX'], "Relationship between a descriptor record and its pre explosion"));
                 }
             }
             if ($k == "RECTYPE") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['RECTYPE'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['RECTYPE'], "Relationship between a descriptor record and its record type"));
                 }
             }
             if ($k == "RH") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['RH'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['RH'], "Relationship between a descriptor record and its running head, in relation to mesh tree structures"));
                 }
             }
             if ($k == "RN") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['RN'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['RN'], "Relationship between a descriptor record and its CAS registry"));
                 }
             }
             if ($k == "RR") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['RR'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['RR'], "Relationship between a descriptor record and its registry number"));
                 }
             }
             if ($k == "ST") {
                 foreach ($v as $kv => $vv) {
                     $vid = parent::getNamespace() . $vv;
                     $pid = parent::getNamespace() . substr($vv, 0, strrpos($vv, ".") - 1);
                     $vlabel = utf8_encode(htmlspecialchars($vv));
                     parent::AddRDF(parent::describeIndividual($vid, $vlabel, parent::getVoc() . "Semantic-Type", $vlabel) . parent::triplify($dr_res, $this->getVoc() . $qde['ST'], $vid) . parent::describeProperty($this->getVoc() . $qde['ST'], "Relationship between a descriptor record and its semantic type"));
                 }
             }
         } else {
             trigger_error("Please add key to descriptor record map: " . $k . PHP_EOL, E_USER_ERROR);
         }
         $this->WriteRDFBufferToWriteFile();
     }
     //foreach
     $this->WriteRDFBufferToWriteFile();
 }
Exemplo n.º 25
0
 function CTD_Genes()
 {
     $first = true;
     while ($l = $this->GetReadFile()->Read()) {
         if ($l[0] == '#') {
             continue;
         }
         $a = explode("\t", $l);
         // check number of columns
         if ($first) {
             if (($c = count(explode("\t", $l))) != 8) {
                 trigger_error("CTD_genes function expects 8 fields, found {$c}!" . PHP_EOL, E_USER_WARNING);
                 return FALSE;
             }
             $first = false;
         }
         $symbol = str_replace(array("\\/"), array('|'), $a[0]);
         $label = str_replace("\\+/", '+', $a[1]);
         $geneid = "ncbigene:" . $a[2];
         $synonyms = $a[4];
         $this->addRDF(parent::describeIndividual($geneid, $label, $this->getVoc() . "Gene") . parent::triplifyString($geneid, $this->getVoc() . "gene-symbol", $symbol) . parent::describeClass($this->getVoc() . "Gene", "CTD Gene"));
         $ids = array(3 => array('rel' => "alternative-ncbigene-id", 'ns' => "ncbigene"), 4 => array('rel' => 'synonym'), 5 => array('rel' => 'x-biogrid', 'ns' => 'biogrid'), 6 => array('rel' => 'x-pharmgkb', 'ns' => 'pharmgkb'), 7 => array('rel' => 'x-uniprot', 'ns' => 'uniprot'));
         foreach ($ids as $i => $v) {
             if (!trim($a[$i])) {
                 continue;
             }
             $b = explode("|", $a[$i]);
             foreach ($b as $c) {
                 if (isset($v['ns'])) {
                     parent::addRDF(parent::triplify($geneid, parent::getVoc() . $v['rel'], $v['ns'] . ":" . $c));
                 } else {
                     parent::addRDF(parent::triplifyString($geneid, parent::getVoc() . $v['rel'], $c));
                 }
             }
         }
         parent::WriteRDFBufferToWriteFile();
     }
     return TRUE;
 }
Exemplo n.º 26
0
 function Run()
 {
     // directory shortcuts
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     // get the work specified
     $list = trim(parent::getParameterValue('files'));
     if ($list == 'all') {
         // call the getAllModelsId webservice
         $file = $ldir . "all_models.json";
         if (!file_exists($file)) {
             try {
                 $x = @new SoapClient("http://www.ebi.ac.uk/biomodels-main/services/BioModelsWebServices?wsdl");
             } catch (Exception $e) {
                 echo $e->getMessage();
             }
             $entries = $x->getAllModelsId();
             file_put_contents($file, json_encode($entries));
         } else {
             $entries = json_decode(file_get_contents($file));
         }
     } elseif ($list == 'curated') {
         // call the getAllCuratedModelsId webservice
         $file = $ldir . "curated_models.json";
         if (!file_exists($file)) {
             try {
                 $x = @new SoapClient("http://www.ebi.ac.uk/biomodels-main/services/BioModelsWebServices?wsdl");
             } catch (Exception $e) {
                 echo $e->getMessage();
             }
             $entries = $x->getAllCuratedModelsId();
             file_put_contents($file, json_encode($entries));
         } else {
             $entries = json_decode(file_get_contents($file));
         }
     } else {
         // check if a hyphenated list was provided
         if (($pos = strpos($list, "-")) !== FALSE) {
             $start_range = substr($list, 0, $pos);
             $end_range = substr($list, $pos + 1);
             for ($i = $start_range; $i <= $end_range; $i++) {
                 $entries[] = "BIOMD" . str_pad($i, 10, "0", STR_PAD_LEFT);
             }
         } else {
             // for comma separated list
             $b = explode(",", $this->GetParameterValue('files'));
             foreach ($b as $e) {
                 $entries[] = "BIOMD" . str_pad($e, 10, "0", STR_PAD_LEFT);
             }
         }
     }
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     // set the write file
     $suffix = parent::getParameterValue('output_format');
     $outfile = 'biomodels' . '.' . $suffix;
     $gz = false;
     if (strstr(parent::getParameterValue('output_format'), "gz")) {
         $gz = true;
     }
     $dataset_description = '';
     parent::setWriteFile($odir . $outfile, $gz);
     // iterate over the entries
     $i = 0;
     $total = count($entries);
     foreach ($entries as $id) {
         echo "processing " . ++$i . " of {$total} - biomodel# " . $id;
         $download_file = $ldir . $id . ".owl.gz";
         $url = parent::getParameterValue('download_url') . "publ/{$id}/{$id}-biopax3.owl";
         // download if the file doesn't exist or we are told to
         if (!file_exists($download_file) || $this->GetParameterValue('download') == 'true') {
             // download
             echo " - downloading";
             $ret = utils::downloadsingle($url, 'compress.zlib://' . $download_file, true);
             if ($ret === false) {
                 echo "\nTrying non-curated model";
                 $url = parent::getParametervalue('download_url') . "uncura_publ/{$id}/{$id}-biopax3.owl";
                 $ret = utils::downloadsingle($url, 'compress.zlib://' . $download_file, true);
                 if ($ret === false) {
                     continue;
                 }
             }
             echo " - downloaded";
         }
         // load entry, parse and write to file
         echo " - parsing... ";
         // $this->SetReadFile($download_file,true);
         $buf = file_get_contents("compress.zlib://" . $download_file);
         $converter = new BioPAX2Bio2RDF($this);
         $converter->SetBuffer($buf)->SetBioPAXVersion(3)->SetBaseNamespace("http://identifiers.org/biomodels.db/{$id}/")->SetBio2RDFNamespace("http://bio2rdf.org/biomodels:" . $id . "_")->SetDatasetURI($this->GetDatasetURI());
         $rdf = $converter->Parse();
         parent::addRDF($rdf);
         parent::writeRDFBufferToWriteFile();
         //generate dataset description
         $source_file = (new DataResource($this))->setURI($url)->setTitle("EBI BioModels Database - BioModel # {$id}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($download_file)))->setFormat("rdf/xml")->setPublisher("http://www.ebi.ac.uk/")->setHomepage("http://www.ebi.ac.uk/biomodels-main/")->setRights("use-share-modify")->setLicense("http://www.ebi.ac.uk/biomodels-main/termsofuse")->setDataset("http://identifiers.org/biomodels.db/");
         $dataset_description .= $source_file->toRDF();
         echo "done!" . PHP_EOL;
     }
     //foreach
     parent::getWriteFile()->close();
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/biomodels/biomodels.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description .= $output_file->toRDF();
     //write dataset description to file
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Exemplo n.º 27
0
 function Parse($xml)
 {
     // state the dataset info
     foreach ($xml->release->dbinfo as $o) {
         $db = $o->attributes()->dbname . " v" . $o->attributes()->version . " (" . $o->attributes()->entry_count . " entries) [" . $o->attributes()->file_date . "]";
         parent::addRDF(parent::triplifyString(parent::getDatasetURI(), parent::getVoc() . "contains", $db));
         if ((string) $o->attributes()->dbname === "INTERPRO") {
             parent::setDatasetVersion($o->attributes()->version);
         }
     }
     // get a potential id list
     if (parent::getParameterValue("id_list") != '') {
         $id_list = explode(",", parent::getParameterValue("id_list"));
     }
     // now interate over the entries
     foreach ($xml->interpro as $o) {
         parent::writeRDFBufferToWriteFile();
         $interpro_id = $o->attributes()->id;
         if (isset($id_list) && !in_array($interpro_id, $id_list)) {
             continue;
         }
         echo "Processing {$interpro_id}" . PHP_EOL;
         $name = $o->name;
         $short_name = $o->attributes()->short_name;
         $type = $o->attributes()->type;
         $s = parent::getNamespace() . $interpro_id;
         //echo "Adding... $s rdfs:label $name ($short_name) $type [$s]".PHP_EOL;
         parent::addRDF(parent::describeIndividual($s, "{$name} ({$short_name}) {$type}", parent::getVoc() . $type));
         // get the pubs
         unset($pubs);
         foreach ($o->pub_list->publication as $p) {
             $pid = (string) $p->attributes()->id;
             if (isset($p->db_xref)) {
                 if ($p->db_xref->attributes()->db == "PUBMED") {
                     $pmid = (string) $p->db_xref->attributes()->dbkey;
                     $pubs['pid'][] = '<cite idref="' . $pid . '"/>';
                     $pubs['pmid'][] = '<a href="http://www.ncbi.nlm.nih.gov/pubmed/' . $pmid . '">pubmed:' . $pmid . '</a>';
                     parent::addRDF(parent::triplify($s, parent::getVoc() . "x-pubmed", "pubmed:{$pmid}"));
                 }
             }
         }
         $abstract = (string) $o->abstract->p->asXML();
         if (isset($pubs)) {
             $abstract = str_replace($pubs['pid'], $pubs['pmid'], $abstract);
         }
         parent::addRDF(parent::triplifyString($s, "dc:description", $abstract));
         if (isset($o->example_list)) {
             foreach ($o->example_list->example as $example) {
                 $db = (string) $example->db_xref->attributes()->db;
                 $id = (string) $example->db_xref->attributes()->dbkey;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "example-entry", "{$db}:{$id}"));
             }
         }
         if (isset($o->parent_list->rel_ref)) {
             foreach ($o->parent_list->rel_ref as $parent) {
                 $id = (string) $parent->attributes()->ipr_ref;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "parent", "interpro:{$id}"));
             }
         }
         if (isset($o->child->rel_ref)) {
             foreach ($o->child->rel_ref as $child) {
                 $id = (string) $child->attributes()->ipr_ref;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "child", "interpro:{$id}"));
             }
         }
         if (isset($o->contains->rel_ref)) {
             foreach ($o->contains->rel_ref as $contains) {
                 $id = (string) $contains->attributes()->ipr_ref;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "contains", "interpro:{$id}"));
             }
         }
         if (isset($o->found_in->rel_ref)) {
             foreach ($o->found_in->rel_ref as $f) {
                 $id = (string) $f->attributes()->ipr_ref;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "found-in", "interpro:{$id}"));
             }
         }
         if (isset($o->sec_list->sec_ac)) {
             foreach ($o->sec_ac as $s) {
                 $id = (string) $s->attributes()->acc;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "secondary-accession", "interpro:{$id}"));
             }
         }
         // xrefs
         if (isset($o->member_list->dbxref)) {
             foreach ($o->member_list->db_xref as $dbxref) {
                 $db = (string) $dbxref->attributes()->db;
                 $id = (string) $dbxref->attributes()->dbkey;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}"));
             }
         }
         if (isset($o->external_doc_list)) {
             foreach ($o->external_doc_list->db_xref as $dbxref) {
                 $db = (string) $dbxref->attributes()->db;
                 $id = (string) $dbxref->attributes()->dbkey;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}"));
             }
         }
         if (isset($o->structure_db_links->db_xref)) {
             foreach ($o->structure_db_links->db_xref as $dbxref) {
                 $db = (string) $dbxref->attributes()->db;
                 $id = (string) $dbxref->attributes()->dbkey;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}"));
             }
         }
         // taxon distribution
         foreach ($o->taxonomy_distribution->taxon_data as $t) {
             $organism = (string) $t->attributes()->name;
             $number = (string) $t->attributes()->proteins_count;
             parent::addRDF(parent::triplifyString($s, parent::getVoc() . "taxon-distribution", "{$organism} ({$number})"));
         }
     }
 }
Exemplo n.º 28
0
 public function TriplifyMap($a, $prefix)
 {
     $defaults = parent::getRegistry()->getDefaultURISchemes();
     $bio2rdf_priority = false;
     $mapping = true;
     // subject
     if ($a['s_type'] == 'bnode') {
         $a['s'] = 'http://bio2rdf.org/' . $prefix . '_resource:' . substr($a['s'], 2);
     }
     $u = $this->parseURI($a['s']);
     $s_uri = $u['uri'];
     if (isset($u['prefix'])) {
         if (!in_array($u['prefix'], $defaults)) {
             if ($bio2rdf_priority) {
                 $s_uri = $u['bio2rdf_uri'];
                 if ($mapping) {
                     parent::addRDF(parent::triplify($s_uri, 'owl:sameAs', $u['uri']));
                 }
             } else {
                 if ($mapping) {
                     parent::addRDF(parent::triplify($u['uri'], 'owl:sameAs', $u['bio2rdf_uri']));
                 }
             }
         }
     } else {
         // add to the registry of uris not found
         if (!isset($this->unmapped_uri[$u['base_uri']])) {
             $this->unmapped_uri[$u['base_uri']] = 1;
         } else {
             $this->unmapped_uri[$u['base_uri']]++;
         }
     }
     // predicate
     $u = $this->parseURI($a['p']);
     $p_uri = $u['uri'];
     if (isset($u['prefix'])) {
         if (!in_array($u['prefix'], $defaults)) {
             if ($bio2rdf_priority) {
                 $p_uri = $u['bio2rdf_uri'];
                 if ($mapping) {
                     parent::addRDF(parent::triplify($p_uri, 'owl:sameAs', $u['uri']));
                 }
             } else {
                 if ($mapping) {
                     parent::addRDF(parent::triplify($u['uri'], 'owl:sameAs', $u['bio2rdf_uri']));
                 }
             }
         }
     } else {
         // add to the registry of uris not found
         if (!isset($this->unmapped_uri[$u['base_uri']])) {
             $this->unmapped_uri[$u['base_uri']] = 1;
         } else {
             $this->unmapped_uri[$u['base_uri']]++;
         }
     }
     if ($a['o_type'] == 'uri' || $a['o_type'] == 'bnode') {
         if ($a['o_type'] == 'bnode') {
             $a['o'] = 'http://bio2rdf.org/' . $prefix . '_resource:' . substr($a['o'], 2);
         }
         $u = $this->parseURI($a['o']);
         $o_uri = $u['uri'];
         if (isset($u['prefix'])) {
             if (!in_array($u['prefix'], $defaults)) {
                 if ($bio2rdf_priority) {
                     $o_uri = $u['bio2rdf_uri'];
                     if ($mapping) {
                         parent::addRDF(parent::triplify($o_uri, 'owl:sameAs', $u['uri']));
                     }
                 } else {
                     if ($mapping) {
                         parent::addRDF(parent::triplify($u['uri'], 'owl:sameAs', $u['bio2rdf_uri']));
                     }
                 }
             }
         } else {
             // add to the registry of uris not found
             if (!isset($this->unmapped_uri[$u['base_uri']])) {
                 $this->unmapped_uri[$u['base_uri']] = 1;
             } else {
                 $this->unmapped_uri[$u['base_uri']]++;
             }
         }
         // add the triple
         parent::addRDF(parent::triplify($s_uri, $p_uri, $o_uri));
     } else {
         parent::addRDF(parent::triplifyString($s_uri, $p_uri, $a['o'], $a['o_datatype'] == '' ? null : $a['o_datatype'], $a['o_lang'] == '' ? null : $a['o_lang']));
     }
 }