function addDate($id, $field, $dateobj) { if ($dateobj == null) { return FALSE; } $year = $dateobj->Year; $month = $dateobj->Month; $day = $dateobj->Day; parent::addRDF(parent::triplifyString($id, parent::getVoc() . $field, "{$year}-{$month}-{$day}", "xsd:date")); }
public function makeDescription($title, $type) { if (!$title) { return null; } $uri = parent::getRes() . md5($title); $type_uri = parent::getVoc() . str_replace(" ", "-", $type); parent::addRDF(parent::describeIndividual($uri, $title, $type_uri) . parent::describeClass($type_uri, $type)); return $uri; }
function parseItem($item) { $id = $item['@attributes']['id']; $label = $item['name']; parent::addRDF(parent::describeIndividual($id, $item['name'], parent::getVoc() . "Entry") . parent::describeClass(parent::getVoc() . "Entry", "MIRIAM database entry") . parent::triplifyString($id, parent::getVoc() . "namespace", $item['namespace'])); if (isset($item['@attributes'])) { foreach ($item['@attributes'] as $k => $v) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . $k, $v)); } } if (isset($item['comment'])) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "comment", $item['comment'])); } if (isset($item['definition'])) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "definition", $item['definition'])); } if (isset($item['synonyms'])) { $mylist = null; if (is_array($item['synonyms']['synonym'])) { $mylist = $item['synonyms']['synonym']; } else { $mylist[] = $item['synonyms']['synonym']; } foreach ($mylist as $myitem) { parent::addRDF(parent::triplifyString($id, "skos:altLabel", $myitem)); } } if (isset($item['uris'])) { foreach ($item['uris']['uri'] as $uri) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "uri", $uri)); } } if (isset($item['resources'])) { $mylist = null; if (!isset($item['resources']['resource']['dataEntry'])) { $mylist = $item['resources']['resource']; } else { $mylist[] = $item['resources']['resource']; } foreach ($mylist as $myitem) { $rid = $myitem['@attributes']['id']; parent::addRDF(parent::describeIndividual($rid, $myitem['dataInfo'], parent::getVoc() . "Resource") . parent::describeClass(parent::getVoc() . "Resource", "MIRIAM Resource") . parent::triplify($rid, parent::getVoc() . "url", $myitem['dataResource']) . parent::triplifyString($rid, parent::getVoc() . "urlTemplate", $myitem['dataEntry']) . parent::triplifyString($rid, parent::getVoc() . "organization", is_array($myitem['dataInstitution']) ? "" : $myitem['dataInstitution']) . parent::triplifyString($rid, parent::getVoc() . "location", is_array($myitem['dataLocation']) ? "" : $myitem['dataLocation']) . parent::triplify($id, parent::getVoc() . "resource", $rid)); } } if (isset($item['tags'])) { $i = $item['tags']['tag']; $mylist = null; if (!is_array($i)) { $mylist[] = $i; } else { $mylist = $i; } foreach ($mylist as $myitem) { parent::addRDF(parent::triplifyString($id, parent::getvoc() . "tag", $myitem)); } } if (isset($item['documentations'])) { $i = $item['documentations']['documentation']; $mylist = null; if (!is_array($i)) { $mylist[] = $i; } else { $mylist = $i; } foreach ($mylist as $myitem) { if (strstr($myitem, "pubmed")) { $uri = "pubmed:" . substr($myitem, strrpos($myitem, ":") + 1); } else { if (strstr($myitem, "doi")) { $uri = "http://dx.doi.org/" . substr($myitem, strpos($myitem, "doi:")); } else { $uri = $myitem; } } parent::addRDF(parent::triplify($id, parent::getvoc() . "documentation", $uri)); } } if (isset($item['restrictions'])) { $mylist = null; if (!isset($item['restrictions']['restriction']['statement'])) { $mylist = $item['restrictions']['restriction']; } else { $mylist[] = $item['restrictions']['restriction']; } foreach ($mylist as $i => $myitem) { $rid = parent::getRes() . str_replace(":", "", $id) . "_" . ($i + 1); $a = $myitem['@attributes']; $rid_type = parent::getVoc() . 'restriction_type_' . $a['type']; parent::addRDF(parent::describeIndividual($rid, $a['desc'], parent::getVoc() . "Restriction") . parent::describeClass(parent::getVoc() . "Restriction", "Resource Restriction") . parent::triplify($rid, "rdf:type", $rid_type) . parent::describeClass($rid_type, $a['desc'], parent::getVoc() . "Restriction") . parent::triplifyString($rid, "dct:description", $myitem['statement']) . parent::triplify($rid, "foaf:page", isset($myitem['link']) ? $myitem['link'] : "") . parent::triplify($id, parent::getVoc() . "restriction", $rid)); } } /* <annotation> <format name="SBML"> <elements> <element>reaction</element> <element>event</element> <element>rule</element> <element>species</element> </elements> </format> */ if (isset($item['annotation'])) { $mylist = null; if (!isset($item['annotation']['format']['elements'])) { $mylist = $item['annotation']['format']; } else { $mylist[] = $item['annotation']['format']; } foreach ($mylist as $i => $myitem) { $name = $myitem['@attributes']['name']; $myid = str_replace("MIR:", parent::getRes(), $id) . "_annotation_" . ($i + 1) . "_" . urlencode($name); parent::addRDF(parent::describeIndividual($myid, "{$label} used by {$name}", parent::getVoc() . "ValueSet") . parent::describeClass(parent::getVoc() . "ValueSet", "MIRIAM Value Set") . parent::triplifyString($myid, parent::getVoc() . "used-in", $name) . parent::triplify($myid, parent::getVoc() . "uses", $id)); $b = $myitem['elements']['element']; $mylist2 = null; if (!is_array($b)) { $mylist2[] = $b; } else { $mylist2 = $b; } foreach ($mylist2 as $i => $e) { parent::addRDF(parent::triplifyString($myid, parent::getVoc() . "used-for", $e)); } } } }
private function process() { $z = 0; $y = 1; while ($l = $this->getReadFile()->Read(200000)) { if ($z++ % 1000000 == 0) { echo $z . PHP_EOL; $odir = parent::getParameterValue('outdir'); $ofile = 'iproclass.' . $y++ . "." . parent::getParameterValue('output_format'); $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false; if (parent::getWriteFile() != null) { parent::getWriteFile()->close(); parent::clear(); } // generate a new file parent::setWriteFile($odir . $ofile, $gz); } $fields = explode("\t", $l); @($uniprot_acc = $fields[0]); @($uniprot = $fields[1]); @($gene = $fields[2]); @($refseq = $fields[3]); @($gi = $fields[4]); @($pdb = $fields[5]); @($pfam = $fields[6]); @($go = $fields[7]); @($pirsf = $fields[8]); @($ipi = $fields[9]); @($uniref_100 = $fields[10]); @($uniref_90 = $fields[11]); @($uniref_50 = $fields[12]); @($uniparc = $fields[13]); //skipping pir-psd because db no longer maintained @($ncbi_taxonomy = $fields[15]); @($mim = $fields[16]); @($unigene = $fields[17]); @($ensembl = $fields[18]); @($pubmed = $fields[19]); @($embl_genbank_ddbj = $fields[20]); @($embl_protein = trim($fields[21])); $id = $uniprot_acc; $id_res = $this->getNamespace() . $id; $id_label = "iproclass entry for uniprot:{$uniprot_acc}"; parent::addRDF(parent::triplify($id_res, $this->getVoc() . "x-uniprot", "uniprot:" . $uniprot_acc)); if (!empty($uniprot)) { $uniprot_ids = explode("; ", $uniprot); foreach ($uniprot_ids as $uniprot_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-uniprot", "uniprot:" . $uniprot_id)); } } if (!empty($gene)) { $gene_ids = explode("; ", $gene); foreach ($gene_ids as $gene_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-ncbigene", "geneid:" . $gene_id)); } } if (!empty($refseq)) { $refseq_ids = explode("; ", $refseq); foreach ($refseq_ids as $refseq_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-refseq", "refseq:" . $refseq_id)); } } if (!empty($gi)) { $gi_ids = explode("; ", $gi); foreach ($gi_ids as $gi_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-gi", "gi:" . $gi_id)); } } if (!empty($pdb)) { $pdb_ids = explode("; ", $pdb); foreach ($pdb_ids as $pdb_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-pdb", "pdb:" . $pdb_id)); } } if (!empty($pfam)) { $pfam_ids = explode("; ", $pfam); foreach ($pfam_ids as $pfam_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-pfam", "pfam:" . $pfam_id)); } } if (!empty($go)) { $go_ids = explode("; ", $go); foreach ($go_ids as $go_id) { $go_id = substr($go_id, 3); parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-go", "go:" . $go_id)); } } if (!empty($pirsf)) { $pirsf_ids = explode("; ", $pirsf); foreach ($pirsf_ids as $pirsf_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-pirsf", "pirsf:" . $pirsf_id)); } } if (!empty($ipi)) { $ipi_ids = explode("; ", $ipi); foreach ($ipi_ids as $ipi_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-ipi", "ipi:" . $ipi_id)); } } if (!empty($uniref_100)) { $uniref_100_ids = explode("; ", $uniref_100); foreach ($uniref_100_ids as $uniref_100_id) { parent::AddRDF(parent::QQuadO_URL($id_res, "rdfs:seeAlso", "http://uniprot.org/uniref/" . $uniref_100_id)); } } if (!empty($uniref_90)) { $uniref_90_ids = explode("; ", $uniref_90); foreach ($uniref_90_ids as $uniref_90_id) { parent::AddRDF(parent::QQuadO_URL($id_res, "rdfs:seeAlso", "http://uniprot.org/uniref/" . $uniref_90_id)); } } if (!empty($uniref_50)) { $uniref_50_ids = explode("; ", $uniref_50); foreach ($uniref_50_ids as $uniref_50_id) { parent::AddRDF(parent::QQuadO_URL($id_res, "rdfs:seeAlso", "http://uniprot.org/uniref/" . $uniref_50_id)); } } if (!empty($uniparc)) { $uniparc_ids = explode("; ", $uniparc); foreach ($uniparc_ids as $uniparc_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-uniparc", "uniparc:" . $uniparc_id) . parent::QQuadO_URL($id_res, "rdfs:seeAlso", "http://uniprot.org/uniparc/" . $uniparc_id)); } } if (!empty($ncbi_taxonomy)) { $taxonomy_ids = explode("; ", $ncbi_taxonomy); foreach ($taxonomy_ids as $taxonomy_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-taxon", "taxon:" . $taxonomy_id)); } } if (!empty($mim)) { $mim_ids = explode("; ", $mim); foreach ($mim_ids as $mim_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-omim", "omim:" . $mim_id)); } } if (!empty($unigene)) { $unigene_ids = explode("; ", $unigene); foreach ($unigene_ids as $unigene_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-unigene", "unigene:" . $unigene_id)); } } if (!empty($ensembl)) { $ensembl_ids = explode("; ", $ensembl); foreach ($ensembl_ids as $ensembl_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-ensembl", "ensembl:" . $ensembl_id)); } } if (!empty($pubmed)) { $pubmed_ids = explode("; ", $pubmed); foreach ($pubmed_ids as $pubmed_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-pubmed", "pubmed:" . $pubmed_id)); } } if (!empty($embl_genbank_ddbj)) { $genbank_ids = explode("; ", $embl_genbank_ddbj); foreach ($genbank_ids as $genbank_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-genbank", "genbank:" . $genbank_id)); } } if (!empty($embl_protein)) { $embl_protein_ids = explode(";", $embl_protein); foreach ($embl_protein_ids as $embl_protein_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-genbank", "genbank:" . $embl_protein_id)); } } //write rdf to file $this->WriteRDFBufferToWriteFile(); } //while }
function gene_interactions() { while ($l = parent::getReadFile()->Read()) { if ($l[0] == '#') { continue; } $data = explode("\t", $l); if (count($data) != 11) { trigger_error("Found " . count($data) . " columns, expecting 11"); continue; } $interaction = $data[0]; $interaction_type = str_replace("_", "-", $data[1]); $interaction_type_label = str_replace("_", " ", $data[1]); $int_additional_info = $data[2]; $gene1 = $data[5]; $gene2 = $data[8]; $interaction_id = parent::getNamespace() . $interaction; if ($interaction_type == "Genetic") { $int_pred = parent::getVoc() . "genetically-interacts-with"; } elseif ($interaction_type == "Physical") { $int_pred = parent::getVoc() . "physically-interacts-with"; } elseif ($interaction_type == "Predicted") { $int_pred = parent::getVoc() . "predicted-to-interact-with"; } elseif ($interaction_type == "Regulatory") { $int_pred = parent::getVoc() . "regulates"; } //elseif if ($int_additional_info == "No_interaction") { $interaction_label = "No " . strtolower($interaction_type) . " interaction between " . $gene1 . " and " . $gene2; parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, parent::getVoc() . $interaction_type . "-Non-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Non-Interaction", $interaction_type_label . " non-interaction") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2)); $npa_id = parent::getRes() . md5($interaction_id . "negative property assertion"); $npa_label = "Negative property assertion stating that " . $gene1 . " and " . $gene2 . " do not have a " . $interaction_type_label . " interaction"; parent::addRDF(parent::describeIndividual($npa_id, $npa_label, "owl:NegativeObjectPropertyAssertion") . parent::triplify($npa_id, "owl:sourceIndividual", parent::getNamespace() . $gene1) . parent::triplify($npa_id, "owl:targetIndividual", parent::getNamespace() . $gene2) . parent::triplify($npa_id, "owl:assertionProperty", $int_pred)); } elseif ($int_additional_info == "N/A" || $int_additional_info == "Genetic_interaction") { $interaction_label = $interaction_type . " interaction between " . $gene1 . " and " . $gene2; parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, parent::getVoc() . $interaction_type . "-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Interaction", $interaction_type_label . " Interaction") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2) . parent::triplify(parent::getNamespace() . $gene1, $int_pred, parent::getNamespace() . $gene2)); } else { $interaction_label = ($int_additional_info != "" ? $int_additional_info . " " : "") . strtolower($interaction_type) . " interaction between " . $gene1 . " and " . $gene2; $type = parent::getVoc() . ($int_additional_info != "" ? $int_additional_info . "-" : "") . $interaction_type . "-Interaction"; $type_label = ($int_additional_info != "" ? $int_additional_info . " " : "") . $interaction_type_label . " Interaction"; parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, $type) . parent::describeClass($type, $type_label, parent::getVoc() . $interaction_type . "-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Interaction", $interaction_type . " Interation") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2) . parent::triplify(parent::getNamespace() . $gene1, $int_pred, parent::getNamespace() . $gene2)); } //else parent::WriteRDFBufferToWriteFile(); } //while }
function Parse($file) { parent::getReadFile()->read(); // skip the first comment line $line = 1; $first = true; while ($l = parent::getReadFile()->read(500000)) { if ($l[0] == "#") { // dataset attributes $a = explode('=', trim($l)); $r = $this->getVoc() . substr($a[0], 2); if (isset($a[1])) { $v = $a[1]; if ($r == "affymetrix_vocabulary:genome-version-create_date") { $x = explode("-", $a[1]); if ($x[2] == "00") { $x[2] = "01"; } $v = implode("-", $x); } parent::addRDF(parent::triplifyString(parent::getDatasetURI(), $r, $v) . parent::describe($r, "{$r}")); } continue; } if ($first == true) { $first = false; // header $header = explode(",", str_replace('"', '', trim($l))); // print_r($header);exit; $n = count($header); if ($n != 41) { trigger_error("Expecting 41 columns, found {$n} in header on line {$line}!", E_USER_ERROR); exit; } continue; } $a = explode('","', substr($l, 1, -2)); $n = count($a); if ($n != 41) { trigger_error("Expecting 41 columns, found {$n} on line {$line}!", E_USER_ERROR); exit; } parent::writeRDFBufferToWriteFile(); $id = $a[0]; $qname = "affymetrix:{$id}"; $label = "probeset {$a['0']} on GeneChip {$a['1']} ({$a['2']})"; parent::addRDF(parent::describeIndividual($qname, $label, $this->getVoc() . "Probeset") . parent::describeClass($this->getVoc() . "Probeset", "Affymetrix probeset")); trigger_error($id, E_USER_NOTICE); // now process the entries foreach ($a as $k => $v) { if (trim($v) == '---') { continue; } // multi-valued entries are separated by //// $b = explode(" /// ", $v); $r = $this->Map($k); if (isset($r)) { foreach ($b as $c) { $d = explode(" // ", $c); if ($r == 'symbol') { $d[0] = str_replace(" ", "-", $d[0]); } $s = $this->getRegistry()->getPreferredPrefix($r); if ($s == "ec") { $e = explode(":", $d[0]); $d[0] = $e[1]; } $this->addRDF(parent::triplify($qname, $this->getVoc() . "x-{$s}", "{$s}:" . $d[0]) . parent::describeProperty($this->getVoc() . "x-{$s}", "a relation to {$s}")); } } else { // we handle manually unset($rel); $label = $header[$k]; switch ($label) { case 'GeneChip Array': $array_id = parent::getRes() . str_replace(" ", "-", $v); parent::addRDF(parent::triplify($qname, $this->getVoc() . "genechip-array", $array_id) . parent::describeIndividual($array_id, "Affymetrix {$v} GeneChip array", $this->getVoc() . "Genechip-Array") . parent::describeClass($this->getVoc() . "Genechip-Array", "Affymetrix GeneChip array")); break; case 'Gene Ontology Biological Process': if (!isset($rel)) { $rel = 'go-process'; $prefix = "go"; } case 'Gene Ontology Cellular Component': if (!isset($rel)) { $rel = 'go-location'; $prefix = "go"; } case 'Gene Ontology Molecular Function': if (!isset($rel)) { $rel = 'go-function'; $prefix = "go"; } $b = explode(" /// ", $v); foreach ($b as $c) { $d = explode(" // ", $c); parent::addRDF($this->triplify($qname, $this->getVoc() . $rel, "{$prefix}:" . $d[0]) . $this->describeProperty($this->getVoc() . $rel, "{$rel}")); } break; case 'Transcript Assignments': $b = explode(" /// ", $v); foreach ($b as $c) { $d = explode(" // ", $c); $id = $d[0]; $prefix = $d[2]; if ($prefix == '---' || $id == '---') { continue; } else { if ($prefix == 'gb' || $prefix == 'gb_htc') { $prefix = 'genbank'; } else { if ($prefix == 'ncbibacterial') { $prefix = 'gi'; } else { if ($prefix == 'ncbi_bacterial') { $prefix = 'gi'; } else { if ($prefix == 'ens') { $prefix = 'ensembl'; } else { if ($prefix == 'ncbi_mito' || $prefix == 'ncbi_organelle' || $prefix == 'organelle') { $prefix = 'refseq'; } else { if ($prefix == 'affx' || $prefix == 'unknown' || $prefix == "prop") { $prefix = 'affymetrix'; } else { if ($prefix == 'tigr_2004_08') { $prefix = 'tigr'; } else { if ($prefix == 'tigr-plantta') { $prefix = 'genbank'; } else { if ($prefix == 'newrs.gi') { $prefix = 'gi'; } else { if ($prefix == 'newRS.gi') { $prefix = 'gi'; } else { if ($prefix == 'primate_viral') { $prefix = 'genbank'; } else { if ($prefix == 'jgi-bacterial') { $prefix = 'ncbigene'; } else { if ($prefix == 'tb') { $prefix = 'tuberculist'; } else { if ($prefix == 'pa') { $prefix = 'pseudomonas'; } else { if ($prefix == 'gi|53267') { $prefix = 'gi'; $id = '53267'; } else { if ($prefix == 'broad-tcup') { $e = explode("-", $id); $id = $e[0]; } else { if ($prefix == 'organelle') { $e = explode("-", $id); $prefix = 'genbank'; $id = $e[0]; } } } } } } } } } } } } } } } } } } parent::addRDF(parent::triplify($qname, $this->getVoc() . "transcript-assignment", "{$prefix}:{$id}") . parent::describeProperty($this->getVoc() . "transcript-assignment", "transcript assignment")); } break; case 'Annotation Transcript Cluster': /* $id = substr($v,0,strpos($v,"(")); $rel = str_replace(" ","-",strtolower($label)); $this->AddRDF($this->triplify($qname,parent::getVoc()."$rel", "refseq:$id")); */ break; case 'Annotation Date': // Jun 9, 2011 $rel = "annotation-date"; preg_match("/^([A-Za-z]+) ([0-9]+), ([0-9]{4})\$/", $v, $m); if (count($m) == 4) { array_shift($m); list($m, $day, $year) = $m; $month = $this->getMonth($m); if (!$day || $day == "0") { $day = "01"; } $date = $year . "-" . $month . "-" . str_pad($day, 2, "0", STR_PAD_LEFT) . "T00:00:00Z"; parent::addRDF(parent::triplifyString($qname, $this->getVoc() . $rel, $date, "xsd:dateTime") . parent::describeProperty($this->getVoc() . $rel, "{$rel}")); } else { trigger_error("could not match date from {$v}", E_USER_ERROR); } break; case 'Species Scientific Name': break; case 'Transcript ID(Array Design)': if (!isset($rel)) { $rel = 'transcript'; } case 'Sequence type': default: if (!isset($rel)) { $rel = str_replace(" ", "-", strtolower($label)); } $b = explode(" /// ", $v); foreach ($b as $c) { parent::addRDF(parent::triplifyString($qname, $this->getVoc() . $rel, stripslashes($c)) . parent::describeProperty($this->getVoc() . $rel, "{$rel}")); } break; } // switch } // else } $this->WriteRDFBufferToWriteFile(); } }
function product($fpin) { $z = 0; $list = ''; fgets($fpin); // header while ($l = fgets($fpin, 100000)) { $a = explode("\t", $l); if (count($a) != 18) { trigger_error("Expected 18 coloumns, instead found" . count($a)); continue; } $product_id = parent::getNamespace() . $a[0]; $product_label = $a[3]; $product_type_label = ucfirst(strtolower($a[2])); $product_type = parent::getVoc() . str_replace(" ", "-", $product_label); parent::addRDF(parent::describeIndividual($product_id, $a[3], parent::getVoc() . "Product") . parent::describeClass(parent::getVoc() . "Product", "NDC Product") . parent::triplify($product_id, parent::getVoc() . "product-type", $product_type) . parent::describeIndividual($product_type, $product_type_label, parent::getVoc() . "Product-Type") . parent::describeClass(parent::getVoc() . "Product-Type", "Product Type") . parent::triplifyString($product_id, parent::getVoc() . "product-id", $a[1]) . parent::triplifyString($product_id, parent::getVoc() . "proprietary-name", $a[3]) . parent::triplifyString($product_id, parent::getVoc() . "trade-name-suffix", $a[4])); if ($a[5]) { $b = explode(";", $a[5]); foreach ($b as $c) { parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "non-proprietary-name", trim($c))); } } if ($a[6]) { $b = explode(",", $a[6]); foreach ($b as $c) { $dosageform = strtolower($c); $dosageform_id = parent::getVoc() . str_replace(" ", "-", ucfirst(strtolower($c))); parent::addRDF(parent::describeIndividual($dosageform_id, $dosageform, parent::getVoc() . "Dosage-Form") . parent::describeClass(parent::getVoc() . "Dosage-Form", "NDC Dosage Form") . parent::triplify($product_id, parent::getVoc() . "dosage-form", $dosageform_id)); } } if ($a[7]) { // MV $b = explode("; ", $a[7]); foreach ($b as $c) { $route = strtolower(trim($c)); $route_id = parent::getVoc() . str_replace(" ", "-", ucfirst(strtolower($c))); parent::addRDF(parent::describeIndividual($route_id, $route, parent::getVoc() . "Route") . parent::describeClass(parent::getVoc() . "Route", "NDC Drug Route") . parent::triplify($product_id, parent::getVoc() . "route", $route_id)); } } if ($a[8]) { $date = substr(0, 4, $a[8]) . "-" . substr(4, 2, $a[8]) . "-" . substr(6, 2, $a[8]); parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "start-marketing-date", $date)); } if ($a[9]) { $date = substr(0, 4, $a[9]) . "-" . substr(4, 2, $a[9]) . "-" . substr(6, 2, $a[9]); parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "end-marketing-date", $date)); } if ($a[10]) { parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "marketing-category", $a[10])); } if ($a[11]) { parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "application-number", $a[11])); } // create a labeller node if ($a[12]) { $labeller_id = parent::getRes() . md5($a[12]); $label = addslashes($a[12]); parent::addRDF(parent::describeIndividual($labeller_id, $label, parent::getVoc() . "Labeller") . parent::describeClass(parent::getVoc() . "Labeller", "NDC Labeller") . parent::triplify($product_id, parent::getVoc() . "labeller", $labeller_id)); } // the next three are together if ($a[13]) { // MV $substances = explode(";", $a[13]); $strengths = explode(";", $a[14]); $units = explode(";", $a[15]); $l = ''; foreach ($substances as $i => $substance) { // list the active ingredient $ingredient_label = strtolower($substance); $strength = ''; if (isset($strengths[$i])) { $strength = $strengths[$i]; } $unit = $units[$i]; $ingredient_id = parent::getRes() . md5($ingredient_label); parent::addRDF(parent::describeIndividual($ingredient_id, $ingredient_label, parent::getVoc() . "Ingredient") . parent::describeClass(parent::getVoc() . "Ingredient", "NDC Ingredient") . parent::triplify($product_id, parent::getVoc() . "ingredient", $ingredient_id)); // describe the substance composition $substance_label = "{$strength} {$unit} {$ingredient_label}"; $substance_id = parent::getRes() . md5($substance_label); parent::addRDF(parent::describeIndividual($substance_id, $substance_label, parent::getVoc() . "Substance") . parent::triplifyString($substance_id, parent::getVoc() . "amount", $strength) . parent::describeClass(parent::getVoc() . "Substance", "NDC Substance")); $unit_id = parent::getVoc() . md5($unit); parent::addRDF(parent::describeIndividual($unit_id, $unit, parent::getVoc() . "Unit") . parent::describeClass(parent::getVoc() . "Unit", "NDC Unit") . parent::triplify($substance_id, parent::getVoc() . "amount_unit", $unit_id) . parent::triplify($product_id, parent::getVoc() . "has-part", $substance_id)); } } if ($a[16]) { // MV $b = explode(",", $a[16]); foreach ($b as $c) { $cat_id = parent::getVoc() . md5($c); parent::addRDF(parent::describeIndividual($cat_id, $c, parent::getVoc() . "Pharmacological-Class") . parent::describeClass(parent::getVoc() . "Pharmacological-Class", "NDC Pharmacological Class") . parent::triplify($product_id, parent::getVoc() . "pharmacological-class", $cat_id)); } } parent::WriteRDFBufferToWriteFile(); } }
function ParseEntry($obj, $type) { $o = $obj["omim"]["entryList"][0]["entry"]; $omim_id = $o['mimNumber']; $omim_uri = parent::getNamespace() . $o['mimNumber']; if (isset($o['version'])) { parent::setDatasetVersion($o['version']); } // add the links parent::addRDF($this->QQuadO_URL($omim_uri, "rdfs:seeAlso", "http://omim.org/entry/" . $omim_id)); parent::addRDF($this->QQuadO_URL($omim_uri, "owl:sameAs", "http://identifiers.org/omim/" . $omim_id)); // parse titles $titles = $o['titles']; parent::addRDF(parent::describeIndividual($omim_uri, $titles['preferredTitle'], parent::getVoc() . str_replace(array(" ", "/"), "-", ucfirst($type))) . parent::describeClass(parent::getVoc() . str_replace(array(" ", "/"), "-", ucfirst($type)), $type)); if (isset($titles['preferredTitle'])) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "preferred-title", $titles['preferredTitle'])); } if (isset($titles['alternativeTitles'])) { $b = explode(";;", $titles['alternativeTitles']); foreach ($b as $title) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "alternative-title", trim($title))); } } // parse text sections if (isset($o['textSectionList'])) { foreach ($o['textSectionList'] as $i => $section) { if ($section['textSection']['textSectionTitle'] == "Description") { parent::addRDF(parent::triplifyString($omim_uri, "dc:description", $section['textSection']['textSectionContent'])); } else { $p = str_replace(" ", "-", strtolower($section['textSection']['textSectionTitle'])); parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "{$p}", $section['textSection']['textSectionContent'])); } // parse the omim references preg_match_all("/\\{([0-9]{6})\\}/", $section['textSection']['textSectionContent'], $m); if (isset($m[1][0])) { foreach ($m[1] as $oid) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "refers-to", "omim:{$oid}")); } } } } // allelic variants if (isset($o['allelicVariantList'])) { foreach ($o['allelicVariantList'] as $i => $v) { $v = $v['allelicVariant']; $uri = parent::getRes() . "{$omim_id}" . "_allele_" . $i; $label = str_replace("\n", " ", $v['name']); parent::addRDF(parent::describeIndividual($uri, $label, parent::getVoc() . "Allelic-Variant") . parent::describeClass(parent::getVoc() . "Allelic-Variant", "Allelic Variant")); if (isset($v['alternativeNames'])) { $names = explode(";;", $v['alternativeNames']); foreach ($names as $name) { $name = str_replace("\n", " ", $name); parent::addRDF(parent::triplifyString($uri, parent::getVoc() . "alternative-names", $name)); } } if (isset($v['text'])) { parent::addRDF(parent::triplifyString($uri, "dc:description", $v['text'])); } if (isset($v['mutations'])) { parent::addRDF(parent::triplifyString($uri, parent::getVoc() . "mutation", $v['mutations'])); } if (isset($v['dbSnps'])) { $snps = explode(",", $v['dbSnps']); foreach ($snps as $snp) { parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-dbsnp", "dbsnp:" . $snp)); } } parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "variant", $uri)); } } // clinical synopsis if (isset($o['clinicalSynopsis'])) { $cs = $o['clinicalSynopsis']; $cs_uri = parent::getRes() . "" . $omim_id . "_cs"; parent::addRDF(parent::describeIndividual($cs_uri, "Clinical synopsis for omim {$omim_id}", parent::getVoc() . "Clinical-Synopsis") . parent::describeClass(parent::getVoc() . "Clinical-Synopsis", "Clinical Synopsis") . parent::triplify($omim_uri, parent::getVoc() . "clinical-synopsis", $cs_uri)); foreach ($cs as $k => $v) { if (!strstr($k, "Exists")) { // ignore the boolean assertion. // @todo ignore provenance for now if (in_array($k, array('contributors', 'creationDate', 'editHistory', 'epochCreated', 'dateCreated', 'epochUpdated', 'dateUpdated'))) { continue; } if (!is_array($v)) { $v = array($k => $v); } foreach ($v as $k1 => $v1) { $phenotypes = explode(";", $v1); foreach ($phenotypes as $coded_phenotype) { // parse out the codes $coded_phenotype = trim($coded_phenotype); if (!$coded_phenotype) { continue; } $phenotype = preg_replace("/\\{.*\\}/", "", $coded_phenotype); $phenotype_id = parent::getRes() . "" . md5(strtolower($phenotype)); $entity_id = parent::getRes() . "" . $k1; parent::addRDF(parent::describeIndividual($phenotype_id, $phenotype, parent::getVoc() . 'Characteristic') . parent::describeClass(parent::getVoc() . 'Characteristic', 'Characteristic') . parent::triplify($cs_uri, parent::getVoc() . "feature", $phenotype_id) . parent::describeIndividual($entity_id, $k1, parent::getVoc() . "Entity") . parent::describeClass(parent::getVoc() . "Entity", "Entity") . parent::triplify($phenotype_id, parent::getVoc() . "characteristic-of", $entity_id)); // parse out the vocab references preg_match_all("/\\{([0-9A-Za-z \\:\\-\\.]+)\\}|;/", $coded_phenotype, $codes); //preg_match_all("/((UMLS|HPO HP|SNOMEDCT|ICD10CM|ICD9CM|EOM ID)\:[A-Z0-9]+)/",$coded_phenotype,$m); if (isset($codes[1][0])) { foreach ($codes[1] as $entry) { $entries = explode(" ", trim($entry)); foreach ($entries as $e) { if ($e == "HPO" || $e == "EOM") { continue; } $this->getRegistry()->parseQName($e, $ns, $id); if (!isset($ns) || $ns == '') { $b = explode(".", $id); $ns = "omim"; $id = $b[0]; } else { $ns = str_replace(array("hpo", "id", "icd10cm", "icd9cm", "snomedct"), array("hp", "eom", "icd10", "icd9", "snomed"), $ns); } parent::addRDF(parent::triplify($phenotype_id, parent::getVoc() . "x-{$ns}", "{$ns}:{$id}")); } // foreach } // foreach } // codes } //foreach } // foreach } // exists } } // clinical synopsis // genemap if (isset($o['geneMap'])) { $map = $o['geneMap']; if (isset($map['chromosome'])) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "chromosome", (string) $map['chromosome'])); } if (isset($map['cytoLocation'])) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "cytolocation", (string) $map['cytoLocation'])); } if (isset($map['geneSymbols'])) { $b = preg_split("/[,;\\. ]+/", $map['geneSymbols']); foreach ($b as $symbol) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "gene-symbol", "symbol:" . trim($symbol))); } } if (isset($map['geneName'])) { $b = explode(",", $map['geneName']); foreach ($b as $name) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "gene-name", trim($name))); } } if (isset($map['mappingMethod'])) { $b = explode(",", $map['mappingMethod']); foreach ($b as $c) { $mapping_method = trim($c); $method_uri = $this->get_method_type($mapping_method); if ($method_uri !== false) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "mapping-method", $method_uri)); } } } if (isset($map['mouseGeneSymbol'])) { $b = explode(",", $map['mouseGeneSymbol']); foreach ($b as $c) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "mouse-gene-symbol", "symbol:" . strtoupper($c))); } } if (isset($map['mouseMgiID'])) { $b = explode(",", $map['mouseMgiID']); foreach ($b as $c) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-mgi", $c)); } } if (isset($map['geneInheritance']) && $map['geneInheritance'] != '') { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "gene-inheritance", $map['geneInheritance'])); } } if (isset($o['phenotypeMapList'])) { foreach ($o['phenotypeMapList'] as $i => $phenotypeMap) { $phenotypeMap = $phenotypeMap['phenotypeMap']; $pm_uri = parent::getRes() . $omim_id . "_pm_" . ($i + 1); parent::addRDF(parent::describeIndividual($pm_uri, "phenotype mapping for {$omim_id}", parent::getVoc() . "Phenotype-Map") . parent::describeClass(parent::getVoc() . "Phenotype-Map", "OMIM Phenotype-Map") . parent::triplify($omim_uri, parent::getVoc() . "phenotype-map", $pm_uri)); foreach (array_keys($phenotypeMap) as $k) { if (in_array($k, array("mimNumber", "phenotypeMimNumber", "phenotypicSeriesMimNumber"))) { parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . $k, "omim:" . $phenotypeMap[$k])); } else { if ($k == "geneSymbols") { $l = explode(", ", $phenotypeMap[$k]); foreach ($l as $gene) { parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . "gene-symbol", "hgnc.symbol:" . $gene)); } } else { if ($k == "phenotypeMappingKey") { $l = $this->get_phenotype_mapping_method_type($phenotypeMap[$k]); parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . "mapping-method", $l)); } else { parent::addRDF(parent::triplifyString($pm_uri, parent::getVoc() . $k, $phenotypeMap[$k])); } } } } } } // references if (isset($o['referenceList'])) { foreach ($o['referenceList'] as $i => $r) { $r = $r['reference']; if (isset($r['pubmedID'])) { $pubmed_uri = "pubmed:" . $r['pubmedID']; parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "article", $pubmed_uri)); $title = 'article'; if (isset($r['title'])) { $title = $r['title']; } parent::addRDF(parent::describe($pubmed_uri, addslashes($r['title']))); if (isset($r['articleUrl'])) { parent::addRDF($this->QQuadO_URL($pubmed_uri, "rdfs:seeAlso", htmlentities($r['articleUrl']))); } } } } // external ids if (isset($o['externalLinks'])) { foreach ($o['externalLinks'] as $k => $id) { if ($id === false) { continue; } $ns = ''; switch ($k) { case 'approvedGeneSymbols': $ns = 'symbol'; break; case 'geneIDs': $ns = 'ncbigene'; break; case 'ncbiReferenceSequences': $ns = 'gi'; break; case 'genbankNucleotideSequences': $ns = 'gi'; break; case 'proteinSequences': $ns = 'gi'; break; case 'uniGenes': $ns = 'unigene'; break; case 'ensemblIDs': $ns = 'ensembl'; break; case 'swissProtIDs': $ns = 'uniprot'; break; case 'mgiIDs': $ns = 'mgi'; $b = explode(":", $id); $id = $b[1]; break; case 'flybaseIDs': $ns = 'flybase'; break; case 'zfinIDs': $ns = 'zfin'; break; case 'hprdIDs': $ns = 'hprd'; break; case 'orphanetDiseases': $ns = 'orphanet'; break; case 'refSeqAccessionIDs': $ns = 'refseq'; break; case 'ordrDiseases': $ns = 'ordr'; $b = explode(";;", $id); $id = $b[0]; break; case 'snomedctIDs': $ns = 'snomed'; break; case 'icd10cmIDs': $ns = 'icd10'; break; case 'icd9cmIDs': $ns = 'icd9'; break; case 'umlsIDs': $ns = 'umls'; break; case 'wormbaseIDs': $ns = 'wormbase'; break; case 'diseaseOntologyIDs': $ns = 'do'; break; // specifically ignorning // specifically ignorning case 'geneTests': case 'cmgGene': case 'geneticAllianceIDs': // # // # case 'nextGxDx': case 'nbkIDs': // NBK1207;;Alport Syndrome and Thin Basement Membrane Nephropathy // NBK1207;;Alport Syndrome and Thin Basement Membrane Nephropathy case 'newbornScreeningUrls': case 'decipherUrls': case 'geneReviewShortNames': case 'locusSpecificDBs': case 'geneticsHomeReferenceIDs': case 'omiaIDs': case 'coriellDiseases': case 'clinicalDiseaseIDs': case 'possumSyndromes': case 'keggPathways': case 'gtr': case 'gwasCatalog': case 'mgiHumanDisease': case 'wormbaseDO': case 'dermAtlas': // true/false break; default: echo "unhandled external link {$k} {$id}" . PHP_EOL; } $ids = explode(",", $id); foreach ($ids as $id) { if ($ns) { if (strstr($id, ";;") === FALSE) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-{$ns}", $ns . ':' . $id)); } else { $b = explode(";;", $id); // multiple ids//names foreach ($b as $c) { preg_match("/([a-z])/", $c, $m); if (!isset($m[1])) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-{$ns}", $ns . ':' . $c)); } } } } } } } //external links }
function Parse() { $l = parent::getReadFile()->read(100000); $header = explode("\t", trim(substr($l, 1))); if (($c = count($header)) != 54) { trigger_erorr("Expecting 54 columns, found {$c}!"); return FALSE; } // check # of columns while ($l = parent::getReadFile()->read(500000)) { $a = explode("\t", trim($l)); // irefindex identifiers $rigid = "irefindex." . $a[34]; # checksum for interaction $rogida = "irefindex." . $a[32]; # checksum for A $rogidb = "irefindex." . $a[33]; # checksum for B $irigid = "irefindex.irigid:" . $a[44]; # integer id for interaction $irogida = "irefindex.irogid:" . $a[42]; # integer id for A $irogidb = "irefindex.irogid:" . $a[43]; # integer id for B $crigid = "irefindex.crigid:" . $a[47]; # checksum for canonical interaction $icrigid = "irefindex.icrigid:" . $a[50]; # integer id for canonical interaction $crogida = "irefindex.crogid:" . $a[45]; # checksum for A's canonical group $crogidb = "irefindex.crogid:" . $a[46]; # checksum for B's canonical group $icrogida = "irefindex.icrogid:" . $a[48]; # integer for A's canonical group $icrogidb = "irefindex.icrogid:" . $a[49]; # integer for B's canonical group // 13 contains the original identifier, the rigid, and the edgetype $ids = explode("|", $a[13]); if (count($ids) != 3) { trigger_error("Expecting 3 entries in column 14"); print_r($ids); exit; } parent::getRegistry()->parseQName($ids[0], $ns, $id); if ($id == '-') { // this happens with hprd $iid = "hprd:" . substr($ids[1], 6); } else { $iid = $ns . ":" . $id; } // get the type if ($a[52] == "X") { $label = "{$a['0']} - {$a['1']} Interaction"; $type = "Pairwise-Interaction"; } else { if ($a[52] == "C") { $label = $a[53] . " component complex"; #num of participants $type = "Multimeric-Complex"; } else { if ($a[52] == "Y") { $label = "{$a['0']} homomeric complex"; $type = "Homopolymeric-Complex"; } } } parent::addRDF(parent::describeIndividual($iid, $label, parent::getVoc() . $type) . parent::describeClass(parent::getVoc() . $type, str_replace("-", " ", $type))); // interaction type[52] by method[6] unset($method); if ($a[6] != '-') { $data = $this->ParseStringArray($a[6]); $method = trim($data["label"]); $qname = trim($data["ns"]) . ":" . trim($data["id"]); if ($qname) { parent::addRDF(parent::triplify($iid, parent::getVoc() . "method", $qname) . parent::describeClass($qname, $data['label'])); } } parent::addRDF(parent::triplify($iid, "rdfs:seeAlso", "http://wodaklab.org/iRefWeb/interaction/show/" . $a[50])); // set the interactors for ($i = 0; $i <= 1; $i++) { $p = 'a'; if ($i == 1) { $p = 'b'; } $data = $this->ParseStringArray($a[$i]); $interactor = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}", $interactor)); // biological role $role = $a[16 + $i]; if ($role != '-') { $data = $this->ParseStringArray($role); $qname = trim($data["ns"]) . ":" . trim($data["id"]); if ($qname != "mi:0000") { parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}" . "_biological_role", $qname) . parent::describeClass($qname, $data['label'])); } } // experimental role $role = $a[18 + $i]; if ($role != '-') { $data = $this->ParseStringArray($role); $qname = trim($data["ns"]) . ":" . trim($data["id"]); if ($qname != "mi:0000") { parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}" . "_experimental_role", $qname) . parent::describeClass($qname, $data['label'])); } } // interactor type $type = $a[20 + $i]; if ($type != '-') { $data = $this->ParseStringArray($type); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($interactor, "rdf:type", $qname) . parent::describeClass($qname, $data['label'])); } } // add the alternatives through the taxon + seq redundant group for ($i = 2; $i <= 3; $i++) { $taxid = ''; $rogid = "irefindex." . $a[32 + ($i - 2)]; parent::addRDF(parent::describeIndividual($rogid, "", parent::getVoc() . "Taxon-Sequence-Identical-Group") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Identical-Group", "Taxon + Sequence Identical Group")); $tax = $a[9 + ($i - 2)]; if ($tax && $tax != '-' && $tax != '-1') { $data = $this->ParseStringArray($tax); $taxid = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($rogid, parent::getVoc() . "x-taxonomy", $taxid)); } $list = explode("|", $a[3 + ($i - 2)]); foreach ($list as $item) { $data = $this->ParseStringArray($item); $ns = trim($data["ns"]); $id = trim($data["id"]); $qname = $ns . ":" . $id; if ($ns && $ns != 'rogid' && $ns != 'irogid' and $id != '-') { parent::addRDF(parent::triplify($rogid, parent::getVoc() . "has-member", $qname)); if ($taxid && $taxid != '-' && $taxid != '-1') { parent::addRDF(parent::triplify($qname, parent::getVoc() . "x-taxonomy", $taxid)); } } } } // publications $list = explode("|", $a[8]); foreach ($list as $item) { if ($item == '-' && $item != 'pubmed:0') { continue; } $data = $this->ParseStringArray($item); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, parent::getVoc() . "article", $qname)); } // MI interaction type if ($a[11] != '-' && $a[11] != 'NA') { $data = $this->ParseStringArray($a[11]); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, "rdf:type", $qname)); if (!isset($defined[$qname])) { $defined[$qname] = ''; parent::addRDF(parent::triplifyString($qname, "rdfs:label", $data['label'])); } } // source if ($a[12] != '-') { $data = $this->ParseStringArray($a[12]); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, parent::getVoc() . "source", $qname)); } // confidence $list = explode("|", $a[14]); foreach ($list as $item) { $data = $this->ParseStringArray($item); $ns = trim($data["ns"]); $id = trim($data["id"]); if ($ns == 'lpr') { // lowest number of distinct interactions that any one article reported parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "minimum-number-interactions-reported", $id)); } else { if ($ns == "hpr") { // higher number of distinct interactions that any one article reports parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "maximum-number-interactions-reported", $id)); } else { if ($ns = 'hp') { // total number of unique PMIDs used to support the interaction parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "number-supporting-articles", $id)); } } } } // expansion method if ($a[15]) { $id = parent::getRes() . md5($a[15]); parent::addRDF(parent::describeIndividual($id, $a[15], parent::getVoc() . "Expansion-Method") . parent::describeClass(parent::getVoc() . "Expansion-Method", "Expansion Method") . parent::triplify($iid, parent::getVoc() . "expansion-method", $id)); } // host organism if ($a[28] != '-') { $data = $this->ParseStringArray($a[28]); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, parent::getVoc() . "host-organism", $qname)); } // @todo add to record // created 2010/05/18 $date = str_replace("/", "-", $a[30]) . "T00:00:00Z"; parent::addRDF(parent::triplifyString($iid, "dc:created", $date, "xsd:dateTime")); // taxon-sequence identical interaction group parent::addRDF(parent::triplify($iid, parent::getVoc() . "taxon-sequence-identical-interaction", $rigid) . parent::triplify($rigid, "rdf:type", parent::getVoc() . "Taxon-Sequence-Identical-Interaction") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Identical-Interaction", "Taxon + Sequence Identical Interaction") . parent::triplify($rigid, parent::getVoc() . "irigid", $irigid) . parent::triplify($rigid, parent::getVoc() . "interactor-a", $rogida) . parent::triplify($rogida, parent::getVoc() . "irogid", $irogida) . parent::triplify($rigid, parent::getVoc() . "interactor-b", $rogidb) . parent::triplify($rogidb, parent::getVoc() . "irogid", $irogidb) . parent::triplify($rogida, parent::getVoc() . "canonical-group", $crogida) . parent::triplify($rogidb, parent::getVoc() . "canonical-group", $crogidb) . parent::triplify($rigid, parent::getVoc() . "taxon-sequence-similar-interaction", $crigid) . parent::triplify($crigid, "rdf:type", parent::getVoc() . "Taxon-Sequence-Canonical-Interaction") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Canonical-Interaction", "Taxon + Sequence Canonical Interaction") . parent::triplify($crigid, parent::getVoc() . "icrigid", $icrigid) . parent::triplify($crigid, parent::getVoc() . "interactor-a-canonical-group", $crogida) . parent::triplify($crogida, "rdf:type", parent::getVoc() . "Taxon-Sequence-Similar-Group") . parent::triplify($crogida, parent::getVoc() . "icrogid", $icrogida) . parent::triplify($crigid, parent::getVoc() . "interactor-b-canonical-group", $crogidb) . parent::triplify($crogidb, "rdf:type", parent::getVoc() . "Taxon-Sequence-Similar-Group") . parent::triplify($crogidb, parent::getVoc() . "icrogid", $icrogidb) . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Similar-Group", "Taxon + Sequence Similar Group")); parent::writeRDFBufferToWriteFile(); } }
function AddCategory(&$x, $id, $list_name, $item_name, $predicate, $list_item_name = null) { if (isset($x->{$list_name})) { foreach ($x->{$list_name} as $item) { if (isset($item->{$item_name}) && $item->{$item_name} != '') { $l = $item->{$item_name}; $att = $l->attributes(); foreach ($l as $key => $item_value) { $kid = parent::getvoc() . md5($item_value); $this->addRDF($this->describeIndividual($kid, "" . $item_value, parent::getVoc() . ucfirst($item_name)) . $this->describeClass(parent::getVoc() . ucfirst($item_name), ucfirst("" . $item_name)) . $this->triplify($id, $predicate, $kid)); foreach ($att as $ka => $va) { parent::addRDF($this->triplifyString($kid, parent::getVoc() . $ka, "" . $va)); } } $kid = parent::getvoc() . md5($l->asXML()); foreach ($l->children() as $k2 => $v2) { $this->addRDF($this->describeIndividual($kid, $k2 == "name" ? $v2 : $predicate, parent::getVoc() . ucfirst($k2)) . $this->describeClass(parent::getVoc() . ucfirst($k2), ucfirst("" . $v2)) . $this->triplifyString($kid, parent::getVoc() . $k2, $v2) . $this->triplify($id, $predicate, $kid)); } } } } }
private function citations() { while ($l = $this->getReadFile()->read(2000000)) { $a = explode("\t|\t", rtrim($l, "\t|\n")); if (!isset($a[1]) or !isset($a[2])) { continue; } $c = parent::getRes() . "citation-id-" . $a[0]; $seealso = isset($a[4]) ? trim($a[4]) : ""; if ($seealso) { $seealso = str_replace(array("lx: DOI ", "http;//"), array("http://dx.doi.org/", "http://"), $seealso); if (strlen($seealso) > 2 and !strstr($seealso, "http")) { $seealso = "http://" . $seealso; } $seealso = parent::triplify($c, "rdfs:seeAlso", $seealso); } parent::addRDF(parent::describeIndividual($c, $a[1], $this->getVoc() . "Citation") . parent::describeClass($this->getVoc() . "Citation", "Citation") . parent::triplifyString($c, parent::getVoc() . "citation-key", $a[1]) . ($a[2] == "0" ? "" : parent::triplify($c, parent::getVoc() . "x-pubmed", "pubmed:" . $a[2])) . $seealso . ((isset($a[5]) and $a[5]) ? parent::triplifyString($c, parent::getVoc() . "text", str_replace("\"", "", $a[5])) : "")); if (isset($a[6])) { $taxids = explode(" ", trim($a[6])); if (count($taxids)) { foreach ($taxids as $taxid) { parent::addRDF(parent::triplify("taxonomy:{$taxid}", $this->getVoc() . "citation", $c)); } } } $this->writeRDFBufferToWriteFile(); } //while }
function genes($file) { $xml = new CXML($file); while ($xml->parse("DisorderList") == TRUE) { $x = $xml->GetXMLRoot(); foreach ($x->Disorder as $d) { $orphanet_id = parent::getNamespace() . (string) $d->OrphaNumber; $disorder_name = (string) $d->Name; foreach ($d->DisorderGeneAssociationList->DisorderGeneAssociation as $dga) { // gene $gene = $dga->Gene; $gene_id = parent::getNamespace() . (string) $gene->OrphaNumber; $gene_internal_id = (string) $gene->attributes()->id; $gene_label = (string) $gene->Name; $gene_symbol = (string) $gene->Symbol; parent::addRDF(parent::describeIndividual($gene_id, $gene_label, parent::getVoc() . "Gene") . parent::describeClass(parent::getVoc() . "Gene", "orphanet gene") . parent::triplifyString($gene_id, parent::getVoc() . "symbol", $gene_symbol)); foreach ($gene->SynonymList as $s) { $synonym = (string) $s->Synonym; parent::addRDF(parent::triplifyString($gene_id, parent::getVoc() . "synonym", $synonym)); } foreach ($gene->ExternalReferenceList as $erl) { $er = $erl->ExternalReference; $db = (string) $er->Source; $db = parent::getRegistry()->getPreferredPrefix($db); $id = (string) $er->Reference; $xref = "{$db}:{$id}"; parent::addRDF(parent::triplify($gene_id, parent::getVoc() . "x-{$db}", $xref)); } $dga_id = parent::getRes() . (string) $d->OrphaNumber . "_" . md5($dga->asXML()); $ga = $dga->DisorderGeneAssociationType; $ga_id = parent::getNamespace() . (string) $ga->attributes()->id; $ga_label = (string) $ga->Name; $s = $dga->DisorderGeneAssociationStatus; $s_id = parent::getNamespace() . (string) $s->attributes()->id; $s_label = (string) $s->Name; parent::addRDF(parent::describeIndividual($dga_id, "{$ga_label} {$gene_label} in {$disorder_name} ({$s_label})", $ga_id) . parent::describeClass($ga_id, $ga_label, parent::getVoc() . "Disorder-Gene-Association") . parent::triplify($dga_id, parent::getVoc() . "status", $s_id) . parent::describeClass($s_id, $s_label, parent::getVoc() . "Disorder-Gene-Association-Status") . parent::triplify($dga_id, parent::getVoc() . "disorder", $orphanet_id) . parent::describeIndividual($orphanet_id, $disorder_name, parent::getVoc() . "Disorder") . parent::triplify($dga_id, parent::getVoc() . "gene", $gene_id)); } parent::writeRDFBufferToWriteFile(); } } unset($xml); }
function freq() { $cols = 10; $i = 1; parent::setCheckpoint('file'); while ($l = parent::getReadFile()->read()) { $a = explode("\t", str_replace("%", "", $l)); if (count($a) != $cols) { trigger_error("Expecting {$cols}, but found " . count($a) . " instead... skipping file!", E_USER_ERROR); return false; } list($stitch_flat, $stitch_stereo, $cui, $placebo, $freq, $freq_lower, $freq_upper, $concept_type, $meddra_concept_id, $meddra_concept_label) = $a; if ($concept_type == "LLT") { continue; } $meddra_concept_label = trim($meddra_concept_label); $id = "stitch_resource:" . md5("se_freq" . $l); $stitch_flat = "stitch:{$stitch_flat}"; $label = "{$meddra_concept_label} frequency for {$stitch_flat}"; parent::addRDF(parent::describeIndividual($id, $label, parent::getVoc() . "Drug-Effect-Frequency") . parent::describeClass(parent::getVoc() . "Drug-Effect-Frequency", "SIDER Drug-Effect and Frequency") . parent::triplify($id, parent::getVoc() . "drug", $stitch_flat) . parent::triplify($id, parent::getVoc() . "effect", "umls:" . $meddra_concept_id)); if ($placebo) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "placebo", "true", "xsd:boolean")); } $number = false; if (is_numeric($freq)) { $flabel = $freq . "%"; $ftype_label = "Exact-Frequency"; $ftype = parent::getVoc() . $ftype_label; $number = true; } else { $flabel = $freq; $ftype_label = "Qualitative-Frequency"; $ftype = parent::getVoc() . "{$ftype_label}"; } if ($freq_lower != $freq_upper) { $flabel .= "({$freq_lower}-{$freq_upper})"; $ftype_label = "Range-Frequency"; $ftype = parent::getVoc() . $ftype_label; } $fid = $id . md5($a[5] . $a[6] . $a[8]); parent::addRDF(parent::triplify($id, parent::getVoc() . "frequency", $fid) . parent::describeIndividual($fid, $flabel, $ftype) . parent::describeClass($ftype, $ftype_label)); if ($number == true) { parent::addRDF(parent::triplifyString($fid, parent::getVoc() . "frequency-value", $freq / 100)); } else { parent::addRDF(parent::triplifyString($fid, parent::getVoc() . "frequency-value", $freq)); } parent::addRDF(parent::triplifyString($fid, parent::getVoc() . "lower-frequency", sprintf("%.3f", $freq_lower)) . parent::triplifyString($fid, parent::getVoc() . "upper-frequency", sprintf("%.3f", $freq_upper))); parent::setCheckpoint('record'); } parent::setCheckpoint('file'); }
function parse($file) { $xml = new CXML($file); $xml->parse(); $entry = $xml->getXMLRoot(); if (!isset($entry) or !$entry) { return false; } foreach ($entry->children() as $o) { $rsid = "rs" . $o->attributes()->rsId; $id = parent::getNamespace() . $rsid; $type = parent::getVoc() . ucfirst(str_replace(" ", "-", (string) $o->attributes()->snpClass)); $snpclass = parent::getVoc() . (string) $o->attributes()->snpClass; $moltype = parent::getVoc() . (string) $o->attributes()->molType; // attributes parent::addRDF(parent::describeIndividual($id, $rsid, $type) . parent::describeClass($type, ucfirst("" . $o->attributes()->snpClass)) . parent::triplify($id, parent::getVoc() . "mol-type", $moltype) . parent::describeClass($moltype, (string) $o->attributes()->molType, parent::getVoc() . "Moltype") . parent::describeClass(parent::getVoc() . "Moltype", "Moltype") . parent::triplify($id, parent::getVoc() . "taxid", "taxonomy:" . (string) $o->attributes()->taxId)); $genotype = (string) $o->attributes()->genoType; if ($genotype) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "genotype", parent::getVoc() . $genotype, "xsd:bool")); } // frequency // create/update /* if(!isset($o->Update)) $a = $o->Create; else $a = $o->Update; parent::addRDF(parent::triplifyString($id,parent::getVoc()."build",(string) $a->attributes()->build)); */ //validation $a = $o->Validation; parent::addRDF(parent::triplifyString($id, parent::getVoc() . "validation-by-cluster", (string) $a->attributes()->byCluster) . parent::triplifyString($id, parent::getVoc() . "validation-by-frequency", (string) $a->attributes()->byFrequency) . parent::triplifyString($id, parent::getVoc() . "validation-by-2hit2allele", (string) $a->attributes()->by2Hit2Allele) . parent::triplifyString($id, parent::getVoc() . "validation-by-1000G", (string) $a->attributes()->by1000G)); //hgvs names foreach ($o->hgvs as $name) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "hgvs-name", (string) $name)); } // assembly $assembly = $o->Assembly; if ($assembly and $assembly->attributes()->reference == "true") { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "dbsnp-build", (string) $assembly->attributes()->dbSnpBuild) . parent::triplifyString($id, parent::getVoc() . "genome-build", (string) $assembly->attributes()->genomeBuild)); $component = $assembly->Component; if ($component) { parent::addRDF(parent::triplify($id, parent::getVoc() . "contig-accession", "genbank:" . (string) $component->attributes()->accession) . parent::triplify($id, parent::getVoc() . "contig-gi", "gi:" . (string) $component->attributes()->gi) . parent::triplifyString($id, parent::getVoc() . "chromosome", (string) $component->attributes()->chromosome)); $maploc = $component->MapLoc; if ($maploc) { foreach ($maploc->children() as $fxnset) { $fxnset_id = parent::getRes() . md5($fxnset->asXML()); parent::addRDF(parent::triplify($id, parent::getVoc() . "maps-to", $fxnset_id) . parent::triplify($fxnset_id, "rdf:type", parent::getVoc() . "Fxnset") . parent::describeClass(parent::getVoc() . "Fxnset", "Fxnset")); if (isset($fxnset->attributes()->geneId)) { parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "gene", "ncbigene:" . (string) $fxnset->attributes()->geneId)); } if (isset($fxnset->attributes()->symbol)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "gene-symbol", (string) $fxnset->attributes()->symbol)); } if (isset($fxnset->attributes()->mrnaAcc)) { parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "mrna", "refseq:" . (string) $fxnset->attributes()->mrnaAcc)); } if (isset($fxnset->attributes()->protAcc)) { parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "protein", "refseq:" . (string) $fxnset->attributes()->protAcc)); } if (isset($fxnset->attributes()->fxnClass)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "fxn-class", (string) $fxnset->attributes()->fxnClass)); } if (isset($fxnset->attributes()->allele)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "allele", (string) $fxnset->attributes()->allele)); } if (isset($fxnset->attributes()->residue)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "residue", (string) $fxnset->attributes()->residue)); } if (isset($fxnset->attributes()->readingFrame)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "reading-frame", (string) $fxnset->attributes()->readingFrame)); } if (isset($fxnset->attributes()->aaPosition)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "position", (string) $fxnset->attributes()->aaPosition)); } } } } } } unset($xml); }
function Run() { // get the work if ($this->GetParameterValue('files') == 'all') { $sources = explode("|", parent::getParameterList('files')); array_shift($sources); } else { // comma separated list $sources = explode(",", parent::getParameterValue('files')); } $download_files = array("h**o-sapiens" => "Pathway%20Commons%202%20homo%20sapiens.BIOPAX.owl.gz", "hprd" => "Pathway%20Commons%202%20HPRD.BIOPAX.owl.gz", "humancyc" => "Pathway%20Commons%202%20HumanCyc.BIOPAX.owl.gz", "nci-nature" => "Pathway%20Commons%202%20NCI_Nature.BIOPAX.owl.gz", "panther-pathway" => "Pathway%20Commons%202%20PANTHER%20Pathway.BIOPAX.owl.gz", "phosphositeplus" => "Pathway%20Commons%202%20PhosphoSitePlus.BIOPAX.owl.gz", "reactome" => "Pathway%20Commons%202%20Reactome.BIOPAX.owl.gz"); $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } $dataset_description = ''; // iterate over the requested data foreach ($sources as $source) { echo "processing {$source}... "; $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); // set the remote and input files $file = $source . ".owl"; $zfile = $source . ".owl.gz"; $rfile = $rdir . $download_files[$source]; $lfile = $ldir . $zfile; // download if if the file doesn't exist locally or we are told to if (!file_exists($lfile) || $this->GetParameterValue('download') == 'true') { // download echo "downloading... "; file_put_contents($lfile, file_get_contents($rfile)); } // extract the file out of the ziparchive // and load into a buffer echo 'extracting... '; if (($fpin = gzopen($lfile, "r")) === FALSE) { trigger_error("Unable to open {$lfile}", E_USER_ERROR); exit; } $data = ''; while (!gzeof($fpin)) { $buffer = gzgets($fpin, 4096); $data .= $buffer; } gzclose($fpin); // set the output file $suffix = parent::getParameterValue('output_format'); $outfile = $source . '.' . $suffix; $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } parent::setWriteFile($odir . $outfile, $gz); // send for parsing $p = new BioPAX2Bio2RDF($this); $p->SetBuffer($data)->SetBioPAXVersion(3)->SetBaseNamespace("http://purl.org/pc2/3/")->SetBio2RDFNamespace("http://bio2rdf.org/pathwaycommons:")->SetDatasetURI(parent::getDatasetURI()); $rdf = $p->Parse(); parent::addRDF($rdf); // write to output parent::writeRDFBufferToWriteFile(); parent::getWriteFile()->Close(); echo "done!" . PHP_EOL; //generate dataset description echo "Generating dataset description for {$zfile}... "; $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Pathway Commons")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("rdf/xml")->setPublisher("http://www.pathwaycommons.org/")->setHomepage("http://www.pathwaycommons.org/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("http://www.pathwaycommons.org/pc2/home.html#data_sources")->setDataset("http://identifiers.org/pathwaycommons/"); $dataset_description .= $source_file->toRDF(); echo "done!" . PHP_EOL; } echo "Generating dataset description for Bio2RDF Pathways Commons dataset... "; $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pathwaycommons/pathwaycommons.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $output_file->toRDF(); //write dataset description to file parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function parseKGML($lfile) { $pathway = simplexml_load_file($lfile); if ($pathway === false) { echo "Error in parsing {$lfile}" . PHP_EOL; return; } $pathway_id = str_replace("path", "kegg", $pathway['name']); $base_id = str_replace("kegg", "kegg_resource", $pathway_id) . "."; parent::addRDF(parent::describeIndividual($pathway_id, $pathway['title'], parent::getVoc() . "Pathway") . parent::triplify($pathway_id, "rdfs:seeAlso", $pathway['link']) . parent::triplify($pathway_id, "foaf:depiction", $pathway['image'])); // get the entries foreach ($pathway->children() as $type => $item) { if ($type == "entry") { $eid = $base_id . $item['id']; $entries["" . $item['id']] = "" . $item['name']; parent::addRDF(parent::describeIndividual($eid, $item['name'], parent::getVoc() . "Ortholog-Group") . parent::describeClass(parent::getVoc() . "Ortholog-Group", "KEGG Ortholog Group")); $mids = explode(" ", $item['name']); foreach ($mids as $mid) { if ($item['type'] == 'path') { $mid = str_replace($mid, ":", "_"); } else { $mid = substr($mid, strpos($mid, ":") + 1); } parent::addRDF(parent::triplify($eid, parent::getVoc() . "member", "kegg:" . $mid)); } } } // iterate over the relations, reactions foreach ($pathway->children() as $type => $item) { if ($type == "relation") { /* <relation entry1="70" entry2="73" type="ECrel"> <subtype name="compound" value="86"/> </relation> <relation entry1="26" entry2="25" type="PPrel"> <subtype name="compound" value="17"/> <subtype name="activation" value="-->"/> </relation> */ $id1 = "" . $item['entry1']; $id2 = "" . $item['entry2']; $type = "" . $type; $relation_id = str_replace("kegg", "kegg_resource", $pathway_id) . "." . $id1 . "." . $id2 . "." . $type; $label = $type . " relation between " . $entries[$id1] . " and " . $entries[$id2]; parent::addRDF(parent::describeIndividual($relation_id, $label, parent::getVoc() . "Pathway-Relation") . parent::describeClass(parent::getVoc() . "Pathway-Relation", "KEGG Pathway Relation") . parent::triplify($relation_id, parent::getVoc() . "source", $base_id . $id1) . parent::triplify($relation_id, parent::getVoc() . "target", $base_id . $id2) . parent::triplify($relation_id, parent::getVoc() . "pathway", $pathway_id) . parent::triplifyString($relation_id, parent::getVoc() . "type", $item['type'])); foreach ($item->children() as $subtype) { parent::addRDF(parent::triplifyString($relation_id, parent::getVoc() . "subtype", '' . $subtype['name'])); } } else { if ($type == "reaction") { /* <reaction id="133" name="rn:R09085" type="irreversible"> <substrate id="86" name="cpd:C00267"/> <product id="90" name="cpd:C00668"/> </reaction> */ $reaction_id = str_replace("kegg", "kegg_resource", $pathway_id) . "." . substr($item['name'], strpos($item['name'], ":") + 1); $reaction_type = parent::getVoc() . ucfirst($item['type']) . "-Reaction"; parent::addRDF(parent::describeIndividual($reaction_id, $item['name'], parent::getVoc() . "Reaction") . parent::describeClass(parent::getVoc() . "Reaction", "KEGG Reaction") . parent::triplify($reaction_id, "rdf:type", $reaction_type)); foreach ($item->children() as $k => $v) { $cid = str_replace("cpd:", "kegg:", $v['name']); parent::addRDF(parent::triplify($reaction_id, parent::getVoc() . $k, $cid)); } } } } return; }
function models() { $tax_ids = array("Caenorhabditis elegans" => "6239", "Mus musculus" => "10090", "Saccharomyces cerevisiae" => "4932", "Drosophila melanogaster" => "7227", "Podospora anserina" => "5145", "Mesocricetus auratus" => "10036", "Schizosaccharomyces pombe" => "4896", "Danio rerio" => "7955"); $h = explode(",", parent::getReadFile()->read()); $expected_columns = 8; if (($n = count($h)) != $expected_columns) { trigger_error("Found {$n} columns in gene file - expecting {$expected_columns}!", E_USER_WARNING); return false; } /* [0] GenAge ID [1] symbol [2] name [3] organism [4] entrez gene id [5] avg lifespan change (max obsv) [6] lifespan effect [7] longevity influence */ while ($l = parent::getReadFile()->read(200000)) { $data = str_getcsv($l); $genage = str_pad($data[0], 4, "0", STR_PAD_LEFT); $gene_symbol = $data[1]; $name = $data[2]; $organism = $data[3]; $ncbi_gene_id = $data[4]; $max_percent_obsv_avg_lifespan_change = $data[5]; $lifespan_effect = $data[6]; $longevity_influence = $data[7]; $genage_id = parent::getNamespace() . $genage; parent::addRDF(parent::describeIndividual($genage_id, $name, parent::getVoc() . "Aging-Related-Gene") . parent::describeClass(parent::getVoc() . "Aging-Related-Gene", "Aging Related Gene")); parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "gene-symbol", parent::safeLiteral($gene_symbol))); parent::addRDF(parent::triplify($genage_id, parent::getVoc() . "taxon", "ncbitaxon:" . $tax_ids[$organism])); if ($ncbi_gene_id !== "") { parent::addRDF(parent::triplify($genage_id, parent::getVoc() . "x-ncbigene", "ncbigene:" . $ncbi_gene_id)); } if ($max_percent_obsv_avg_lifespan_change !== "") { parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "maximum-percent-observed-average-lifespan-change", parent::safeLiteral($max_percent_obsv_avg_lifespan_change))); } if ($lifespan_effect == "Increase and Decrease") { parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", "increase") . parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", "decrease")); } else { parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", strtolower($lifespan_effect))); } parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "longevity-influence", strtolower($longevity_influence))); parent::WriteRDFBufferToWriteFile(); } }
function MGI_Geno_NotDisease() { $line = 1; while ($l = $this->getReadFile()->read(248000)) { $a = explode("\t", $l); if (count($a) != 8) { trigger_error("Incorrect number of columns", E_USER_WARNING); continue; } $genotype = $a[0]; $alleles = explode("|", strtolower($a[2])); $diseases = explode(",", $a[7]); foreach ($diseases as $d) { $disease = "omim:{$d}"; foreach ($alleles as $allele) { $id = parent::getRes() . md5($allele . $disease); $label = "{$allele} {$disease} absent association"; parent::addRDF(parent::describeIndividual($id, $label, $this->getVoc() . "Allele-Disease-Non-Association") . parent::describeClass($this->getVoc() . "Allele-Disease-Non-Association", "MGI Allele-Disease Non-Association") . parent::triplify($id, $this->getVoc() . "allele", $allele) . parent::triplifyString($id, $this->getVoc() . "genotype-string", $genotype) . parent::triplify($id, $this->getVoc() . "disease", $disease) . parent::triplifyString($id, $this->getVoc() . "is-negated", "true")); if ($a[5]) { $pmids = explode(",", $a[5]); foreach ($pmids as $pmid) { parent::addRDF(parent::triplify($id, $this->getVoc() . "x-pubmed", "pubmed:" . $pmid)); } } } } $this->writeRDFBufferToWriteFile(); } }
function twosides() { $items = null; $id = 0; $this->GetReadFile()->Read(); while ($l = $this->GetReadFile()->Read()) { $a = explode("\t", $l); $id++; if ($id % 10000 == 0) { $this->WriteRDFBufferToWriteFile(); } $uid = "twosides:{$id}"; $d1 = "pubchemcompound:" . (int) sprintf("%d", substr($a[0], 4)); $d1_name = $a[2]; $d2 = "pubchemcompound:" . (int) sprintf("%d", substr($a[1], 4)); $d2_name = $a[3]; $e = "umls:" . $a[4]; $e_name = strtolower($a[5]); $uid_label = "DDI between {$d1_name} and {$d2_name} leading to {$e_name}"; if (!isset($items[$d1])) { parent::addRDF(parent::describeIndividual($d1, $d1_name, parent::getVoc() . "Chemical") . parent::describeClass(parent::getVoc() . "Chemical", "PharmGKB Chemical")); $items[$d1] = ''; } if (!isset($items[$d2])) { parent::addRDF(parent::describeIndividual($d2, $d2_name, parent::getVoc() . "Chemical") . parent::describeClass(parent::getVoc() . "Chemical", "PharmGKB Chemical")); $items[$d2] = ''; } if (!isset($items[$e])) { parent::addRDF(parent::describeIndividual($e, $e_name, parent::getVoc() . "Event") . parent::describeClass(parent::getVoc() . "Event", "PharmGKB side effect event")); $items[$e] = ''; } parent::addRDF(parent::describeIndividual($uid, $uid_label, parent::getVoc() . "Drug-Drug-Association") . parent::describeClass(parent::getVoc() . "Drug-Drug-Association", "PharmGKB Twosides Drug-Drug Association") . parent::triplify($uid, parent::getVoc() . "chemical", $d1) . parent::triplify($uid, parent::getVoc() . "chemical", $d2) . parent::triplify($uid, parent::getVoc() . "event", $e) . parent::triplifyString($uid, parent::getVoc() . "p-value", $a[7])); } parent::writeRDFBufferToWriteFile(); }
function gene_expression() { $h = explode(",", parent::getReadFile()->read()); $expected_columns = 8; if (($n = count($h)) != $expected_columns) { trigger_error("Found {$n} columns in gene file - expecting {$expected_columns}!", E_USER_WARNING); return false; } while ($l = parent::getReadFile()->read(200000)) { $data = str_getcsv($l); $mgi_symbol = $data[0]; $mgi_description = $data[1]; $geneid = $data[2]; $total_datasets = $data[3]; $total_ovexp = $data[4]; $total_underexp = $data[5]; $p_value = $data[6]; $expression = $data[7]; $id = parent::getRes() . md5($geneid . $total_datasets . $total_ovexp . $total_underexp . $p_value . $expression); $evidence_id = parent::getRes() . md5($geneid . $total_datasets . $total_ovexp . $total_underexp . $p_value . $expression . "_evidence"); $label = "Dietary restriction induced " . $expression . "-expression of " . $mgi_symbol . " based on microarray results from " . $total_datasets . " datasets, with p-value " . $p_value; $type_label = "Gene " . ucfirst($expression) . " Expression"; $type = parent::getVoc() . str_replace(" ", "-", $type_label); parent::addRDF(parent::describeIndividual($id, $label, $type) . parent::describeClass($type, $type_label) . parent::triplify($id, parent::getVoc() . "gene", "ncbigene:" . $geneid) . parent::triplifyString("ncbigene:" . $geneid, parent::getVoc() . "mgi-gene-symbol", $mgi_symbol) . parent::triplifyString("ncbigene:" . $geneid, parent::getVoc() . "mgi-gene-description", $mgi_description) . parent::triplify($id, parent::getVoc() . "evidence", $evidence_id) . parent::triplifyString($id, parent::getVoc() . "perturbation-context", "dietary restriction") . parent::triplifyString($evidence_id, parent::getVoc() . "total-number-datasets", $total_datasets) . parent::triplifyString($evidence_id, parent::getVoc() . "total-number-datasets-overexpressed", $total_ovexp) . parent::triplifyString($evidence_id, parent::getVoc() . "total-number-datasets-underexpressed", $total_underexp) . parent::triplifyString($evidence_id, parent::getVoc() . "p-value", $p_value)); parent::writeRDFBufferToWriteFile(); } //while }
private function gene2accession() { $this->getReadFile()->read(200000); $header = array(0 => array('rel' => "x-taxonomy", 'ns' => "taxonomy"), 1 => array('rel' => "ncbigene", 'ns' => "ncbigene"), 2 => array('rel' => "status"), 3 => array('rel' => "rna-nucleotide-accession.version", 'ns' => "genbank"), 4 => array('rel' => "rna-nucleotide-gi", 'ns' => "gi"), 5 => array('rel' => "protein-accession.version", 'ns' => "genbank"), 6 => array('rel' => "protein-gi", 'ns' => "gi"), 7 => array('rel' => "genomic-nucleotide-accession.version", 'ns' => "genbank"), 8 => array('rel' => "genomic-nucleotide-gi", 'ns' => "gi"), 9 => array('rel' => "genomic-start-position"), 10 => array('rel' => "genomic-end-position"), 11 => array('rel' => "orientation"), 12 => array('rel' => "assembly"), 13 => array('rel' => "mature-peptide-accession.version", 'ns' => "genbank"), 14 => array('rel' => "mature-peptide-gi", 'ns' => "gi"), 15 => array('rel' => "symbol")); //(tab is used as a separator, pound sign - start of a comment) */ $z = 1; while ($l = $this->getReadFile()->read(200000)) { if ($l[0] == "#") { continue; } if ($z++ % 10000 == 0) { echo $z . PHP_EOL; parent::clear(); } $a = explode("\t", rtrim($l)); if (count($a) != 16) { trigger_error("gene2accession: expecting 16 columns, found " . count($a) . " instead", E_USER_ERROR); } $taxid = $a[0]; if (isset($this->taxids) and !isset($this->taxids[$taxid])) { continue; } $id = parent::getNamespace() . $a[1]; $refseq = false; if ($a[2] != '-') { $refseq = true; } if ($a[9] != '-' and $a[10] != '-') { $region = parent::getRes() . $a[7] . "/" . $a[9] . "-" . $a[10]; $start_pos = parent::getRes() . $a[7] . "/" . $a[9]; $stop_pos = parent::getRes() . $a[7] . "/" . $a[10]; if ($a[11] == "+") { $orientation = "faldo:ForwardStrandPosition"; } else { if ($a[11] == "-") { $orientation = "faldo:ReverseStrandPosition"; } else { $orientation = "faldo:StrandedPosition"; } } parent::addRDF(parent::describeIndividual($region, "location of ncbigene:" . $a[1] . " on " . $a[7], "faldo:Region") . parent::describeIndividual($start_pos, "start of ncbigene:" . $a[1] . " on " . $a[7], "faldo:ExactPosition") . parent::describeIndividual($stop_pos, "stop position of ncbigene:" . $a[1] . " on " . $a[7], "faldo:ExactPosition") . parent::triplify($id, "faldo:location", $region) . parent::triplify($region, "faldo:begin", $start_pos) . parent::triplify($start_pos, "rdf:type", $orientation) . parent::triplifyString($start_pos, "faldo:position", $a[9], "xsd:integer") . parent::triplify($start_pos, "faldo:reference", "refseq:" . $a[7]) . parent::triplify($region, "faldo:end", $stop_pos) . parent::triplify($stop_pos, "rdf:type", $orientation) . parent::triplifyString($stop_pos, "faldo:position", $a[10], "xsd:integer") . parent::triplify($stop_pos, "faldo:reference", "refseq:" . $a[7])); } foreach ($header as $i => $v) { if ($a[$i] == "-") { continue; } if ($i == 1 or $i == 9 or $i == 10 or $i == 11) { continue; } /// ncbigene if (isset($v['ns'])) { $ns = $v['ns']; if ($ns == 'genbank' and $refseq == true) { $ns = 'refseq'; } parent::addRDF(parent::triplify($id, parent::getVoc() . $v['rel'], "{$ns}:" . $a[$i])); } else { parent::addRDF(parent::triplifyString($id, parent::getVoc() . $v['rel'], $a[$i])); } } parent::writeRDFBufferToWriteFile(); } //while }
function process($file) { $z = 1; while ($l = parent::getReadFile()->read(100000)) { if ($z % 100000 == 0) { parent::clear(); } if ($l[0] == "!") { continue; } $fields = explode("\t", $l); if (count($fields) != 17) { trigger_error("Expected 17 columns, but found " . count($fields), E_USER_ERROR); return false; } //get the Go id $db = $fields[0]; $id = $fields[1]; $symbol = $fields[2]; $qualifier = $fields[3]; $goid = substr($fields[4], 3); $refs = $this->getDbReferences($fields[5]); $eco = $this->getEvidenceCodeLabelArr($fields[6]); $aspect = $this->getAspect($fields[8]); $label = $fields[9]; $synonyms = explode("|", $fields[10]); $taxid = $fields[12]; $date = $this->parseDate($fields[13]); $assignedBy = $fields[14]; //entity id $eid = $this->getdbURI($db, $id); if (!$eid) { print_r($fields); continue; } parent::addRDF(parent::describeIndividual($eid, $label, parent::getVoc() . "GO-Annotation") . parent::describeClass(parent::getVoc() . "GO-Annotation", "GO Annotation") . parent::triplifyString($eid, parent::getVoc() . "symbol", $symbol)); parent::addRDF(parent::triplify($eid, parent::getVoc() . "x-taxonomy", $taxid)); foreach ($synonyms as $s) { if (!empty($s)) { parent::addRDF(parent::triplifyString($eid, parent::getVoc() . "synonym", $s)); } } $rel = $aspect; if ($qualifier == 'NOT') { if ($aspect == 'process') { $rel = 'not-in-process'; } if ($aspect == 'function') { $rel = 'not-has-function'; } if ($aspect == 'component') { $rel = 'not-in-component'; } } parent::addRDF(parent::describeObjectProperty(parent::getVoc() . $rel, str_replace("-", " ", $rel)) . parent::triplify($eid, parent::getVoc() . $rel, "go:" . $goid)); $type = key($eco); $aid = parent::getRes() . $file . "_" . $z++; parent::addRDF(parent::describeObjectProperty(parent::getVoc() . "go-annotation", "GO annotation") . parent::triplify($eid, parent::getVoc() . "go-annotation", $aid)); $cat = parent::getRes() . md5($aspect); parent::addRDF(parent::describeIndividual($aid, "{$id}-go:{$goid} association", parent::getVoc() . "GO-Annotation") . parent::triplify($aid, parent::getVoc() . "target", $eid) . parent::triplify($aid, parent::getVoc() . "go-term", "go:" . $goid) . parent::triplify($aid, parent::getVoc() . "evidence", "eco:" . $eco[$type][1]) . parent::triplify($aid, parent::getVoc() . "go-category", $cat) . parent::describeClass($cat, $aspect) . parent::triplifyString($aid, parent::getVoc() . "assigned-by", $assignedBy)); if ($date != '') { parent::addRDF(parent::triplifyString($aid, parent::getVoc() . "entry-date", $date . "T00:00:00Z", "xsd:dateTime")); } foreach ($refs as $ref) { $b = explode(":", $ref); if ($b[0] == 'PMID') { parent::addRDF(parent::triplify($aid, parent::getVoc() . "article", "pubmed:" . $b[1])); } } //write RDF to file parent::writeRDFBufferToWriteFile(); } }
/** * Convert pubchem substance XML record to RDF **/ function parse_substance_record(&$xml) { $root = $xml->GetXMLRoot(); // pubchem identifier and version $sid = array_shift($root->xpath('//PC-Substance_sid/PC-ID/PC-ID_id')); $sid_version = array_shift($root->xpath('//PC-Substance_sid/PC-ID/PC-ID_version')); $psid = $this->getPcsNs() . $sid; parent::addRDF(parent::describeIndividual($psid, null, $this->getPcsVoc() . "Substance")); parent::addRDF(parent::triplifyString($psid, $this->getPcsVoc() . "version", parent::safeLiteral($sid_version))); // reference to pubchem compounds $pc_compounds = $root->xpath('//PC-Substance_compound/PC-Compounds/PC-Compound'); foreach ($pc_compounds as $compound) { $cid = array_shift($compound->xpath('./PC-Compound_id/PC-CompoundType/PC-CompoundType_id/PC-CompoundType_id_cid')); $cid_type = array_shift($compound->xpath('./PC-Compound_id/PC-CompoundType/PC-CompoundType_type')); if ($cid != "") { $pcid = $this->getPccNs() . $cid; parent::addRDF(parent::triplify($psid, $this->getPcsVoc() . "compound", $pcid)); } } // database cross references (xref) // source identifier $source_id = array_shift($root->xpath('//PC-Substance_source/PC-Source/PC-Source_db/PC-DBTracking/PC-DBTracking_source-id/Object-id/Object-id_str')); parent::addRDF(parent::triplifyString($psid, $this->getPcsVoc() . "source-identifier", parent::safeLiteral($source_id))); // synonyms $synonyms = $root->xpath('//PC-Substance_synonyms/PC-Substance_synonyms_E'); foreach ($synonyms as $synonym) { parent::addRDF(parent::triplifyString($psid, $this->getPcsVoc() . "synonym", parent::safeLiteral($synonym))); } //comment $comments = $root->xpath('//PC-Substance_comment/PC-Substance_comment_E'); foreach ($comments as $comment) { if ($comment !== "") { parent::addRDF(parent::triplifyString($psid, "rdfs:comment", parent::safeLiteral($comment))); } } }
/** * add an RDF representation of the incoming param to the model. * @$desc_record_arr is an assoc array with the contents of one qualifier record */ private function makeDescriptorRecord($desc_record_arr) { //get the UI of the descriptor record $dr_ui = $desc_record_arr["UI"][0]; $dr_res = $this->getNamespace() . $dr_ui; $dr_label = $desc_record_arr['MH'][0]; parent::AddRDF(parent::describeIndividual($dr_res, $dr_label, $this->getVoc() . "Descriptor", $dr_label) . parent::describeClass($this->getVoc() . "Descriptor", "MeSH Descriptor")); //now get the descriptor_data_elements $qde = $this->getDescriptorDataElements(); //iterate over the properties foreach ($desc_record_arr as $k => $v) { if (array_key_exists($k, $qde)) { if ($k == "AN") { foreach ($v as $kv => $vv) { //explode by semicolon $vvrar = explode(";", $vv); foreach ($vvrar as $anAn) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde["AN"], $anAn) . parent::describeProperty($this->getVoc() . $qde["AN"], "Relationship between a descriptor and its annotation")); } //foreach } //foreach } //if //add allowable topical qualifiers if ($k == "AQ") { //$x = $this->getDescriptorDataElements(); foreach ($v as $kv => $vv) { $vvrar = explode(" ", $vv); foreach ($vvrar as $aq) { $aq_res = $this->getRes() . $aq; parent::AddRDF(parent::triplify($aq_res, "rdf:type", $this->getVoc() . "allowable-topical-qualifier") . parent::describeClass($this->getVoc() . "allowable-topical-qualifier", "allowable topical qualifier: " . $qde['AQ'])); parent::AddRDF(parent::triplify($dr_res, $this->getVoc() . $qde['AQ'], $aq_res) . parent::describeProperty($this->getVoc() . $qde['AQ'], "Relationship between a descriptor and its allowable topical qualifiers")); } //foreach } //foreach } //if //add CATALOGING SUBHEADINGS LIST NAME if ($k == "CATSH") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['CATSH'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['CATSH'], "Relationship between a descriptor and its cataloging subheadings list name")); } } //if if ($k == "CX") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['CX'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['CATSH'], "Relationship between a descriptor and xrefs")); } } //if //add date of entry if ($k == "DA") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['DA'], $this->formatDate($vv), "xsd:date") . parent::describeProperty($this->getVoc() . $qde['DA'], "Relationship between a descriptor and its date of entry")); } } //if //descriptor class if ($k == "DC") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['DC'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['DC'], "Relationship between a descriptor and its descriptor class")); } } //if //descriptor entry version if ($k == "DE") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['DE'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['DE'], "Relationship between a descriptor record and its entry version")); } } //if //descriptor sort version if ($k == "DS") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['DS'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['DS'], "Relationship between a descriptor record and its sort version")); } } //if //date major descriptor established if ($k == "DX") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['DX'], $this->formatDate($vv), "xsd:date") . parent::describeProperty($this->getVoc() . $qde['DX'], "Relationship between a descriptor and its date of major descriptor established")); } } //if if ($k == "EC") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['EC'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['EC'], "Relationship between a descriptor and its entry combination")); } } if ($k == "PRINT ENTRY") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['PRINT ENTRY'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['PRINT ENTRY'], "Relationship between a descriptor and its print entry term")); } } if ($k == "ENTRY") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['ENTRY'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['ENTRY'], "Relationship between a descriptor and its entry term")); } } if ($k == "FX") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['FX'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['FX'], "Relationship between a descriptor and its forward cross reference")); } } if ($k == "GM") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['GM'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['GM'], "Relationship between a descriptor and its grateful med note")); } } if ($k == "HN") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['HN'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['HN'], "Relationship between a descriptor record and its history note")); } } if ($k == "MED") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['MED'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['MED'], "Relationship between a descriptor and its backfile postings")); } } if ($k == "M94") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M94'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M94'], "Relationship between a descriptor and its backfile postings")); } } if ($k == "M90") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M90'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M90'], "Relationship between a descriptor and its backfile postings")); } } if ($k == "M85") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M85'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M85'], "Relationship between a descriptor record and its backfile postings")); } } if ($k == "M80") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M80'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M80'], "Relationship between a descriptor record and its backfile postings")); } } if ($k == "M75") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M75'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M75'], "Relationship between a descriptor record and its backfile postings")); } } if ($k == "M66") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M66'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M66'], "Relationship between a descriptor record and its backfile postings")); } } if ($k == "MH_TH") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['MH_TH'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['MH_TH'], "Relationship between a descriptor record and its MeSH Heading thesaurus id")); } } if ($k == "MH") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['MH'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['MH'], "Relationship between a descriptor record and its MeSH Heading")); } } if ($k == "MN") { foreach ($v as $kv => $vv) { $vid = parent::getNamespace() . $vv; $vlabel = utf8_encode(htmlspecialchars($vv)); parent::AddRDF(parent::describeIndividual($vid, $dr_label, parent::getVoc() . "Tree-Entry", $vlabel) . parent::triplify($dr_res, $this->getVoc() . $qde['MN'], $vid) . parent::describeProperty($this->getVoc() . $qde['MN'], "Relationship between a descriptor record and its MeSH Tree Number")); if (FALSE !== ($pos = strrpos($vv, "."))) { $pid = parent::getNamespace() . substr($vv, 0, $pos); parent::addRDF(parent::triplify($vid, "rdfs:subClassOf", $pid)); } } } if ($k == "MR") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['MR'], $this->formatDate($vv), "xsd:date") . parent::describeProperty($this->getVoc() . $qde['MR'], "Relationship between a descriptor record and its major revision date")); } } if ($k == "MS") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['MS'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['MS'], "Relationship between a descriptor record and its MeSH scope note")); } } if ($k == "N1") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['N1'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['N1'], "Relationship between a descriptor record and its CAS 1 name")); } } if ($k == "OL") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['OL'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['OL'], "Relationship between a descriptor record and its online note")); } } if ($k == "PA") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['PA'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['PA'], "Relationship between a descriptor record and its pharmacological action")); } } if ($k == "PI") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['PI'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['PI'], "Relationship between a descriptor record and its previous indexing")); } } if ($k == "PM") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['PM'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['PM'], "Relationship between a descriptor record and its public mesh note")); } } if ($k == "PX") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['PX'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['PX'], "Relationship between a descriptor record and its pre explosion")); } } if ($k == "RECTYPE") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['RECTYPE'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['RECTYPE'], "Relationship between a descriptor record and its record type")); } } if ($k == "RH") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['RH'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['RH'], "Relationship between a descriptor record and its running head, in relation to mesh tree structures")); } } if ($k == "RN") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['RN'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['RN'], "Relationship between a descriptor record and its CAS registry")); } } if ($k == "RR") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['RR'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['RR'], "Relationship between a descriptor record and its registry number")); } } if ($k == "ST") { foreach ($v as $kv => $vv) { $vid = parent::getNamespace() . $vv; $pid = parent::getNamespace() . substr($vv, 0, strrpos($vv, ".") - 1); $vlabel = utf8_encode(htmlspecialchars($vv)); parent::AddRDF(parent::describeIndividual($vid, $vlabel, parent::getVoc() . "Semantic-Type", $vlabel) . parent::triplify($dr_res, $this->getVoc() . $qde['ST'], $vid) . parent::describeProperty($this->getVoc() . $qde['ST'], "Relationship between a descriptor record and its semantic type")); } } } else { trigger_error("Please add key to descriptor record map: " . $k . PHP_EOL, E_USER_ERROR); } $this->WriteRDFBufferToWriteFile(); } //foreach $this->WriteRDFBufferToWriteFile(); }
function CTD_Genes() { $first = true; while ($l = $this->GetReadFile()->Read()) { if ($l[0] == '#') { continue; } $a = explode("\t", $l); // check number of columns if ($first) { if (($c = count(explode("\t", $l))) != 8) { trigger_error("CTD_genes function expects 8 fields, found {$c}!" . PHP_EOL, E_USER_WARNING); return FALSE; } $first = false; } $symbol = str_replace(array("\\/"), array('|'), $a[0]); $label = str_replace("\\+/", '+', $a[1]); $geneid = "ncbigene:" . $a[2]; $synonyms = $a[4]; $this->addRDF(parent::describeIndividual($geneid, $label, $this->getVoc() . "Gene") . parent::triplifyString($geneid, $this->getVoc() . "gene-symbol", $symbol) . parent::describeClass($this->getVoc() . "Gene", "CTD Gene")); $ids = array(3 => array('rel' => "alternative-ncbigene-id", 'ns' => "ncbigene"), 4 => array('rel' => 'synonym'), 5 => array('rel' => 'x-biogrid', 'ns' => 'biogrid'), 6 => array('rel' => 'x-pharmgkb', 'ns' => 'pharmgkb'), 7 => array('rel' => 'x-uniprot', 'ns' => 'uniprot')); foreach ($ids as $i => $v) { if (!trim($a[$i])) { continue; } $b = explode("|", $a[$i]); foreach ($b as $c) { if (isset($v['ns'])) { parent::addRDF(parent::triplify($geneid, parent::getVoc() . $v['rel'], $v['ns'] . ":" . $c)); } else { parent::addRDF(parent::triplifyString($geneid, parent::getVoc() . $v['rel'], $c)); } } } parent::WriteRDFBufferToWriteFile(); } return TRUE; }
function Run() { // directory shortcuts $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); // get the work specified $list = trim(parent::getParameterValue('files')); if ($list == 'all') { // call the getAllModelsId webservice $file = $ldir . "all_models.json"; if (!file_exists($file)) { try { $x = @new SoapClient("http://www.ebi.ac.uk/biomodels-main/services/BioModelsWebServices?wsdl"); } catch (Exception $e) { echo $e->getMessage(); } $entries = $x->getAllModelsId(); file_put_contents($file, json_encode($entries)); } else { $entries = json_decode(file_get_contents($file)); } } elseif ($list == 'curated') { // call the getAllCuratedModelsId webservice $file = $ldir . "curated_models.json"; if (!file_exists($file)) { try { $x = @new SoapClient("http://www.ebi.ac.uk/biomodels-main/services/BioModelsWebServices?wsdl"); } catch (Exception $e) { echo $e->getMessage(); } $entries = $x->getAllCuratedModelsId(); file_put_contents($file, json_encode($entries)); } else { $entries = json_decode(file_get_contents($file)); } } else { // check if a hyphenated list was provided if (($pos = strpos($list, "-")) !== FALSE) { $start_range = substr($list, 0, $pos); $end_range = substr($list, $pos + 1); for ($i = $start_range; $i <= $end_range; $i++) { $entries[] = "BIOMD" . str_pad($i, 10, "0", STR_PAD_LEFT); } } else { // for comma separated list $b = explode(",", $this->GetParameterValue('files')); foreach ($b as $e) { $entries[] = "BIOMD" . str_pad($e, 10, "0", STR_PAD_LEFT); } } } $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } // set the write file $suffix = parent::getParameterValue('output_format'); $outfile = 'biomodels' . '.' . $suffix; $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } $dataset_description = ''; parent::setWriteFile($odir . $outfile, $gz); // iterate over the entries $i = 0; $total = count($entries); foreach ($entries as $id) { echo "processing " . ++$i . " of {$total} - biomodel# " . $id; $download_file = $ldir . $id . ".owl.gz"; $url = parent::getParameterValue('download_url') . "publ/{$id}/{$id}-biopax3.owl"; // download if the file doesn't exist or we are told to if (!file_exists($download_file) || $this->GetParameterValue('download') == 'true') { // download echo " - downloading"; $ret = utils::downloadsingle($url, 'compress.zlib://' . $download_file, true); if ($ret === false) { echo "\nTrying non-curated model"; $url = parent::getParametervalue('download_url') . "uncura_publ/{$id}/{$id}-biopax3.owl"; $ret = utils::downloadsingle($url, 'compress.zlib://' . $download_file, true); if ($ret === false) { continue; } } echo " - downloaded"; } // load entry, parse and write to file echo " - parsing... "; // $this->SetReadFile($download_file,true); $buf = file_get_contents("compress.zlib://" . $download_file); $converter = new BioPAX2Bio2RDF($this); $converter->SetBuffer($buf)->SetBioPAXVersion(3)->SetBaseNamespace("http://identifiers.org/biomodels.db/{$id}/")->SetBio2RDFNamespace("http://bio2rdf.org/biomodels:" . $id . "_")->SetDatasetURI($this->GetDatasetURI()); $rdf = $converter->Parse(); parent::addRDF($rdf); parent::writeRDFBufferToWriteFile(); //generate dataset description $source_file = (new DataResource($this))->setURI($url)->setTitle("EBI BioModels Database - BioModel # {$id}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($download_file)))->setFormat("rdf/xml")->setPublisher("http://www.ebi.ac.uk/")->setHomepage("http://www.ebi.ac.uk/biomodels-main/")->setRights("use-share-modify")->setLicense("http://www.ebi.ac.uk/biomodels-main/termsofuse")->setDataset("http://identifiers.org/biomodels.db/"); $dataset_description .= $source_file->toRDF(); echo "done!" . PHP_EOL; } //foreach parent::getWriteFile()->close(); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/biomodels/biomodels.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $output_file->toRDF(); //write dataset description to file parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function Parse($xml) { // state the dataset info foreach ($xml->release->dbinfo as $o) { $db = $o->attributes()->dbname . " v" . $o->attributes()->version . " (" . $o->attributes()->entry_count . " entries) [" . $o->attributes()->file_date . "]"; parent::addRDF(parent::triplifyString(parent::getDatasetURI(), parent::getVoc() . "contains", $db)); if ((string) $o->attributes()->dbname === "INTERPRO") { parent::setDatasetVersion($o->attributes()->version); } } // get a potential id list if (parent::getParameterValue("id_list") != '') { $id_list = explode(",", parent::getParameterValue("id_list")); } // now interate over the entries foreach ($xml->interpro as $o) { parent::writeRDFBufferToWriteFile(); $interpro_id = $o->attributes()->id; if (isset($id_list) && !in_array($interpro_id, $id_list)) { continue; } echo "Processing {$interpro_id}" . PHP_EOL; $name = $o->name; $short_name = $o->attributes()->short_name; $type = $o->attributes()->type; $s = parent::getNamespace() . $interpro_id; //echo "Adding... $s rdfs:label $name ($short_name) $type [$s]".PHP_EOL; parent::addRDF(parent::describeIndividual($s, "{$name} ({$short_name}) {$type}", parent::getVoc() . $type)); // get the pubs unset($pubs); foreach ($o->pub_list->publication as $p) { $pid = (string) $p->attributes()->id; if (isset($p->db_xref)) { if ($p->db_xref->attributes()->db == "PUBMED") { $pmid = (string) $p->db_xref->attributes()->dbkey; $pubs['pid'][] = '<cite idref="' . $pid . '"/>'; $pubs['pmid'][] = '<a href="http://www.ncbi.nlm.nih.gov/pubmed/' . $pmid . '">pubmed:' . $pmid . '</a>'; parent::addRDF(parent::triplify($s, parent::getVoc() . "x-pubmed", "pubmed:{$pmid}")); } } } $abstract = (string) $o->abstract->p->asXML(); if (isset($pubs)) { $abstract = str_replace($pubs['pid'], $pubs['pmid'], $abstract); } parent::addRDF(parent::triplifyString($s, "dc:description", $abstract)); if (isset($o->example_list)) { foreach ($o->example_list->example as $example) { $db = (string) $example->db_xref->attributes()->db; $id = (string) $example->db_xref->attributes()->dbkey; parent::addRDF(parent::triplify($s, parent::getVoc() . "example-entry", "{$db}:{$id}")); } } if (isset($o->parent_list->rel_ref)) { foreach ($o->parent_list->rel_ref as $parent) { $id = (string) $parent->attributes()->ipr_ref; parent::addRDF(parent::triplify($s, parent::getVoc() . "parent", "interpro:{$id}")); } } if (isset($o->child->rel_ref)) { foreach ($o->child->rel_ref as $child) { $id = (string) $child->attributes()->ipr_ref; parent::addRDF(parent::triplify($s, parent::getVoc() . "child", "interpro:{$id}")); } } if (isset($o->contains->rel_ref)) { foreach ($o->contains->rel_ref as $contains) { $id = (string) $contains->attributes()->ipr_ref; parent::addRDF(parent::triplify($s, parent::getVoc() . "contains", "interpro:{$id}")); } } if (isset($o->found_in->rel_ref)) { foreach ($o->found_in->rel_ref as $f) { $id = (string) $f->attributes()->ipr_ref; parent::addRDF(parent::triplify($s, parent::getVoc() . "found-in", "interpro:{$id}")); } } if (isset($o->sec_list->sec_ac)) { foreach ($o->sec_ac as $s) { $id = (string) $s->attributes()->acc; parent::addRDF(parent::triplify($s, parent::getVoc() . "secondary-accession", "interpro:{$id}")); } } // xrefs if (isset($o->member_list->dbxref)) { foreach ($o->member_list->db_xref as $dbxref) { $db = (string) $dbxref->attributes()->db; $id = (string) $dbxref->attributes()->dbkey; parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}")); } } if (isset($o->external_doc_list)) { foreach ($o->external_doc_list->db_xref as $dbxref) { $db = (string) $dbxref->attributes()->db; $id = (string) $dbxref->attributes()->dbkey; parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}")); } } if (isset($o->structure_db_links->db_xref)) { foreach ($o->structure_db_links->db_xref as $dbxref) { $db = (string) $dbxref->attributes()->db; $id = (string) $dbxref->attributes()->dbkey; parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}")); } } // taxon distribution foreach ($o->taxonomy_distribution->taxon_data as $t) { $organism = (string) $t->attributes()->name; $number = (string) $t->attributes()->proteins_count; parent::addRDF(parent::triplifyString($s, parent::getVoc() . "taxon-distribution", "{$organism} ({$number})")); } } }
public function TriplifyMap($a, $prefix) { $defaults = parent::getRegistry()->getDefaultURISchemes(); $bio2rdf_priority = false; $mapping = true; // subject if ($a['s_type'] == 'bnode') { $a['s'] = 'http://bio2rdf.org/' . $prefix . '_resource:' . substr($a['s'], 2); } $u = $this->parseURI($a['s']); $s_uri = $u['uri']; if (isset($u['prefix'])) { if (!in_array($u['prefix'], $defaults)) { if ($bio2rdf_priority) { $s_uri = $u['bio2rdf_uri']; if ($mapping) { parent::addRDF(parent::triplify($s_uri, 'owl:sameAs', $u['uri'])); } } else { if ($mapping) { parent::addRDF(parent::triplify($u['uri'], 'owl:sameAs', $u['bio2rdf_uri'])); } } } } else { // add to the registry of uris not found if (!isset($this->unmapped_uri[$u['base_uri']])) { $this->unmapped_uri[$u['base_uri']] = 1; } else { $this->unmapped_uri[$u['base_uri']]++; } } // predicate $u = $this->parseURI($a['p']); $p_uri = $u['uri']; if (isset($u['prefix'])) { if (!in_array($u['prefix'], $defaults)) { if ($bio2rdf_priority) { $p_uri = $u['bio2rdf_uri']; if ($mapping) { parent::addRDF(parent::triplify($p_uri, 'owl:sameAs', $u['uri'])); } } else { if ($mapping) { parent::addRDF(parent::triplify($u['uri'], 'owl:sameAs', $u['bio2rdf_uri'])); } } } } else { // add to the registry of uris not found if (!isset($this->unmapped_uri[$u['base_uri']])) { $this->unmapped_uri[$u['base_uri']] = 1; } else { $this->unmapped_uri[$u['base_uri']]++; } } if ($a['o_type'] == 'uri' || $a['o_type'] == 'bnode') { if ($a['o_type'] == 'bnode') { $a['o'] = 'http://bio2rdf.org/' . $prefix . '_resource:' . substr($a['o'], 2); } $u = $this->parseURI($a['o']); $o_uri = $u['uri']; if (isset($u['prefix'])) { if (!in_array($u['prefix'], $defaults)) { if ($bio2rdf_priority) { $o_uri = $u['bio2rdf_uri']; if ($mapping) { parent::addRDF(parent::triplify($o_uri, 'owl:sameAs', $u['uri'])); } } else { if ($mapping) { parent::addRDF(parent::triplify($u['uri'], 'owl:sameAs', $u['bio2rdf_uri'])); } } } } else { // add to the registry of uris not found if (!isset($this->unmapped_uri[$u['base_uri']])) { $this->unmapped_uri[$u['base_uri']] = 1; } else { $this->unmapped_uri[$u['base_uri']]++; } } // add the triple parent::addRDF(parent::triplify($s_uri, $p_uri, $o_uri)); } else { parent::addRDF(parent::triplifyString($s_uri, $p_uri, $a['o'], $a['o_datatype'] == '' ? null : $a['o_datatype'], $a['o_lang'] == '' ? null : $a['o_lang'])); } }