function genes($file) { $xml = new CXML($file); while ($xml->parse("DisorderList") == TRUE) { $x = $xml->GetXMLRoot(); foreach ($x->Disorder as $d) { $orphanet_id = parent::getNamespace() . (string) $d->OrphaNumber; $disorder_name = (string) $d->Name; foreach ($d->DisorderGeneAssociationList->DisorderGeneAssociation as $dga) { // gene $gene = $dga->Gene; $gene_id = parent::getNamespace() . (string) $gene->OrphaNumber; $gene_internal_id = (string) $gene->attributes()->id; $gene_label = (string) $gene->Name; $gene_symbol = (string) $gene->Symbol; parent::addRDF(parent::describeIndividual($gene_id, $gene_label, parent::getVoc() . "Gene") . parent::describeClass(parent::getVoc() . "Gene", "orphanet gene") . parent::triplifyString($gene_id, parent::getVoc() . "symbol", $gene_symbol)); foreach ($gene->SynonymList as $s) { $synonym = (string) $s->Synonym; parent::addRDF(parent::triplifyString($gene_id, parent::getVoc() . "synonym", $synonym)); } foreach ($gene->ExternalReferenceList as $erl) { $er = $erl->ExternalReference; $db = (string) $er->Source; $db = parent::getRegistry()->getPreferredPrefix($db); $id = (string) $er->Reference; $xref = "{$db}:{$id}"; parent::addRDF(parent::triplify($gene_id, parent::getVoc() . "x-{$db}", $xref)); } $dga_id = parent::getRes() . (string) $d->OrphaNumber . "_" . md5($dga->asXML()); $ga = $dga->DisorderGeneAssociationType; $ga_id = parent::getNamespace() . (string) $ga->attributes()->id; $ga_label = (string) $ga->Name; $s = $dga->DisorderGeneAssociationStatus; $s_id = parent::getNamespace() . (string) $s->attributes()->id; $s_label = (string) $s->Name; parent::addRDF(parent::describeIndividual($dga_id, "{$ga_label} {$gene_label} in {$disorder_name} ({$s_label})", $ga_id) . parent::describeClass($ga_id, $ga_label, parent::getVoc() . "Disorder-Gene-Association") . parent::triplify($dga_id, parent::getVoc() . "status", $s_id) . parent::describeClass($s_id, $s_label, parent::getVoc() . "Disorder-Gene-Association-Status") . parent::triplify($dga_id, parent::getVoc() . "disorder", $orphanet_id) . parent::describeIndividual($orphanet_id, $disorder_name, parent::getVoc() . "Disorder") . parent::triplify($dga_id, parent::getVoc() . "gene", $gene_id)); } parent::writeRDFBufferToWriteFile(); } } unset($xml); }
function ParseStringArray($string) { parent::getRegistry()->parseQName($string, $ns, $str); $rm = $this->ParseIDLabelArray($str); if ($rm !== null) { $id = trim($rm["id"]); $label = trim($rm["label"]); if ($ns == 'other' || $ns == 'xx') { $ns = ''; } if ($ns == 'complex') { $ns = 'rogid'; } $returnMe = array(); $returnMe["label"] = $label; $returnMe["id"] = $id; $returnMe["ns"] = $ns; return $returnMe; } else { return null; } }
function OBO2RDF($abbv) { $abbv = strtolower($abbv); if ($abbv == "doid") { $abbv = "do"; } $minimal = parent::getParameterValue('detail') == 'min' ? true : false; $minimalp = parent::getParameterValue('detail') == 'min+' ? true : false; $version = parent::getParameterValue("bio2rdf_release"); $tid = ''; $first = true; $is_a = false; $is_deprecated = false; $min = $buf = ''; $ouri = "http://bio2rdf.org/lsr:" . $abbv; $dataset_uri = $abbv . "_resource:bio2rdf.dataset.{$abbv}.R" . $version; parent::setGraphURI($dataset_uri); $buf = parent::triplify($ouri, "rdf:type", "owl:Ontology"); $graph_uri = '<' . parent::getRegistry()->getFQURI(parent::getGraphURI()) . '>'; $bid = 1; while ($l = parent::getReadFile()->read()) { $lt = trim($l); if (strlen($lt) == 0) { continue; } if ($lt[0] == '!') { continue; } if (strstr($l, "[Term]")) { // first node? if ($first == true) { // ignore the first case $first = false; } else { if ($tid != '' && $is_a == false && $is_deprecated == false) { $t = parent::triplify($tid, "rdfs:subClassOf", "obo_vocabulary:Entity"); $buf .= $t; $min .= $t; } } $is_a = false; $is_deprecated = false; unset($typedef); $term = ''; $tid = ''; continue; } else { if (strstr($l, "[Typedef]")) { $is_a = false; $is_deprecated = false; unset($term); $tid = ''; $typedef = ''; continue; } } //echo "LINE: $l".PHP_EOL; // to fix error in obo generator $lt = str_replace("synonym ", "synonym: ", $lt); $lt = preg_replace("/\\{.*\\} !/", " !", $lt); $a = explode(" !", $lt); if (isset($a[1])) { $exc = trim($a[1]); } $a = explode(": ", trim($a[0]), 2); // let's go if (isset($intersection_of)) { if ($a[0] != "intersection_of") { // $intersection_of .= ")].".PHP_EOL; //$buf .= $intersection_of; if ($minimalp) { $min .= $intersection_of; } unset($intersection_of); } } if (isset($relationship)) { if ($a[0] != "relationship") { // $relationship .= ")].".PHP_EOL; //$buf .= $relationship; if ($minimalp) { $min .= $relationship; } unset($relationship); } } if (isset($typedef)) { if ($a[0] == "id") { $c = explode(":", $a[1]); if (count($c) == 1) { $ns = "obo"; $id = $c[0]; } else { $ns = strtolower($c[0]); $id = $c[1]; } $id = str_replace(array("(", ")"), array("_", ""), $id); $tid = $ns . ":" . $id; } else { if ($a[0] == "name") { $buf .= parent::describeClass($tid, addslashes(stripslashes($a[1]))); } else { if ($a[0] == "is_a") { if (FALSE !== ($pos = strpos($a[1], "!"))) { $a[1] = substr($a[1], 0, $pos - 1); } $buf .= parent::triplify($tid, "rdfs:subPropertyOf", "obo_vocabulary:" . strtolower($a[1])); } else { if ($a[0] == "is_obsolete") { $buf .= parent::triplify($tid, "rdf:type", "owl:DeprecatedClass"); $is_deprecated = true; } else { if ($a[0][0] == "!") { $a[0] = substr($a[0], 1); } $buf .= parent::triplifyString($tid, "obo_vocabulary:{$a['0']}", str_replace('"', '', stripslashes($a[1]))); } } } } } else { if (isset($term)) { if ($a[0] == "is_obsolete" && $a[1] == "true") { $t = parent::triplify($tid, "rdf:type", "owl:DeprecatedClass"); $t .= parent::triplify($tid, "rdfs:subClassOf", "owl:DeprecatedClass"); $min .= $t; $buf .= $t; $is_deprecated = true; } else { if ($a[0] == "id") { parent::getRegistry()->parseQName($a[1], $ns, $id); $tid = "{$ns}:{$id}"; // $buf .= parent::describeClass($tid,null,"owl:Class"); // $buf .= parent::triplify($tid,"rdfs:isDefinedBy",$ouri); } else { if ($a[0] == "name") { // $t = parent::triplifyString($tid,"rdfs:label",str_replace(array("\"", "'"), array("","\\\'"), stripslashes($a[1]))." [$tid]"); $label = str_replace(array("\"", "'"), array("", "\\\\'"), stripslashes($a[1])); $t = parent::describeIndividual($tid, $label, "owl:Class"); $t .= parent::triplify($tid, "rdfs:isDefinedBy", $ouri); $min .= $t; $buf .= $t; } else { if ($a[0] == "def") { $t = str_replace(array("'", "\"", "\\", "\\\\'"), array("\\\\'", "", "", ""), $a[1]); $min .= parent::triplifyString($tid, "dc:description", $t); $buf .= parent::triplifyString($tid, "dc:description", $t); } else { if ($a[0] == "property_value") { $b = explode(" ", $a[1]); $buf .= parent::triplifyString($tid, "obo_vocabulary:" . strtolower($b[0]), str_replace("\"", "", strtolower($b[1]))); } else { if ($a[0] == "xref") { // http://upload.wikimedia.org/wikipedia/commons/3/34/Anatomical_Directions_and_Axes.JPG // Medical Dictionary:http\://www.medterms.com/ // KEGG COMPOUND:C02788 "KEGG COMPOUND" // id-validation-regexp:\"REACT_[0-9\]\{1\,4}\\.[0-9\]\{1\,3}|[0-9\]+\" //$a[1] = 'id-validation-regexp:\"REACT_[0-9\]\{1\,4}\\.[0-9\]\{1\,3}|[0-9\]+\"'; if (substr($a[1], 0, 4) == "http") { $buf .= parent::triplify($tid, "rdfs:seeAlso", str_replace(array(" ", '"wiki"', "\\"), array("+", "", ""), $a[1])); } else { $b = explode(":", $a[1], 2); if (substr($b[1], 0, 4) == "http") { $buf .= parent::triplify($tid, "rdfs:seeAlso", stripslashes($b[1])); } else { $ns = str_replace(array(" ", "\\"), "", strtolower($b[0])); $id = trim($b[1]); // there may be a comment to remove if (FALSE !== ($pos = strrpos($id, ' "'))) { $comment = substr($id, $pos + 1, -1); $id = substr($id, 0, $pos); } $id = stripslashes($id); // there may be a source statement to remove $id = preg_replace("/{.*\\}/", "", $id); if ($ns == "pmid") { $ns = "pubmed"; $y = explode(" ", $id); $id = $y[0]; } if ($ns == "xx") { continue; } if ($ns == "icd9cm") { $y = explode(" ", $id); $id = $y[0]; } if ($ns == "xref; umls_cui") { continue; } if ($ns == "submitter") { $ns = "chebi.submitter"; } if ($ns == "wikipedia" || $ns == "mesh") { $id = str_replace(" ", "+", $id); } if ($ns == "id-validation-regexp") { $buf .= parent::triplifyString($tid, "obo_vocabulary:{$ns}", addslashes($id)); } else { $buf .= parent::triplify($tid, "obo_vocabulary:x-{$ns}", "{$ns}:" . str_replace(" ", "-", $id)); } } } } else { if ($a[0] == "synonym") { // synonym: "entidades moleculares" RELATED [IUPAC:] // synonym: "molecular entity" EXACT IUPAC_NAME [IUPAC:] // synonym: "Chondrococcus macrosporus" RELATED synonym [NCBITaxonRef:Krzemieniewska_and_Krzemieniewski_1926] //grab string inside double quotes preg_match('/"(.*)"(.*)/', $a[1], $matches); if (!empty($matches)) { $a[1] = str_replace(array("\\", "\"", "'"), array("", "", "\\\\'"), $matches[1] . $matches[2]); } else { $a[1] = str_replace(array("\"", "'"), array("", "\\\\'"), $a[1]); } $rel = "SYNONYM"; $list = array("EXACT", "BROAD", "RELATED", "NARROW"); $found = false; foreach ($list as $keyword) { // get everything after the keyword up until the bracket [ if (FALSE !== ($k_pos = strpos($a[1], $keyword))) { $str_len = strlen($a[1]); $keyword_len = strlen($keyword); $keyword_end_pos = $k_pos + $keyword_len; $b1_pos = strrpos($a[1], "["); $b2_pos = strrpos($a[1], "]"); $b_text = substr($a[1], $b1_pos + 1, $b2_pos - $b1_pos - 1); $diff = $b1_pos - $keyword_end_pos - 1; if ($diff != 0) { // then there is more stuff here $k = substr($a[1], $keyword_end_pos + 1, $diff); $rel = trim($k); } else { // create the long predicate $rel = $keyword . "_SYNONYM"; } $found = true; $str = substr($a[1], 0, $k_pos - 1); break; } } // check to see if we still haven't found anything if ($found === false) { // we didn't find one of the keywords // so take from the start to the bracket $b1_pos = strrpos($a[1], "["); $str = substr($a[1], 0, $b1_pos - 1); } $rel = str_replace(" ", "_", $rel); // $lit = addslashes($str.($b_text?" [".$b_text."]":"")); $l = parent::triplifyString($tid, "obo_vocabulary:" . strtolower($rel), $str); $buf .= $l; } else { if ($a[0] == "alt_id") { parent::getRegistry()->parseQname($a[1], $ns, $id); if ($id != 'curators') { $buf .= parent::triplify("{$ns}:{$id}", "rdfs:seeAlso", stripslashes($tid)); } } else { if ($a[0] == "is_a") { // do subclassing parent::getRegistry()->parseQName($a[1], $ns, $id); $t = parent::triplify($tid, "rdfs:subClassOf", "{$ns}:{$id}"); $buf .= $t; $min .= $t; $is_a = true; } else { if ($a[0] == "intersection_of") { if (!isset($intersection_of)) { // $intersection_of = '<'.parent::getRegistry()->getFQURI($tid).'> <'.parent::getRegistry()->getFQURI('owl:equivalentClass').'> [<'.parent::getRegistry()->getFQURI('rdf:type').'> <'.parent::getRegistry()->getFQURI('owl:Class').'>; <'.parent::getRegistry()->getFQURI('owl:intersectionOf').'> ('; $intersection_of = '<' . parent::getRegistry()->getFQURI($tid) . '> <' . parent::getRegistry()->getFQURI('owl:equivalentClass') . '> _:b' . ++$bid . " {$graph_uri} ." . PHP_EOL; $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('rdf:type') . '> <' . parent::getRegistry()->getFQURI('owl:Class') . "> {$graph_uri} ." . PHP_EOL; $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:intersectionOf') . '> _:b' . ++$bid . " {$graph_uri} ." . PHP_EOL; } /* intersection_of: ECO:0000206 ! BLAST evidence intersection_of: develops_from VAO:0000092 ! chondrogenic condensation intersection_of: OBO_REL:has_part VAO:0000040 ! cartilage tissue */ $c = explode(" ", $a[1]); if (count($c) == 1) { // just a class parent::getRegistry()->parseQName($c[0], $ns, $id); $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('rdfs:subClassOf') . '> <' . parent::getRegistry()->getFQURI("{$ns}:{$id}") . "> {$graph_uri} ." . PHP_EOL; $buf .= parent::triplify($tid, "rdfs:subClassOf", "{$ns}:{$id}"); } else { if (count($c) == 2) { // an expression parent::getRegistry()->parseQName($c[0], $pred_ns, $pred_id); parent::getRegistry()->parseQName($c[1], $obj_ns, $obj_id); $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:onProperty') . '> <' . parent::getRegistry()->getFQURI("obo_vocabulary:" . $pred_id) . "> {$graph_uri} ." . PHP_EOL; $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:someValuesFrom') . '> <' . parent::getRegistry()->getFQURI("{$obj_ns}:{$obj_id}") . "> {$graph_uri} ." . PHP_EOL; $buf .= parent::triplify($tid, "obo_vocabulary:{$pred_id}", "{$obj_ns}:{$obj_id}"); } } } else { if ($a[0] == "relationship") { if (!isset($relationship)) { $relationship = '<' . parent::getRegistry()->getFQURI($tid) . '> <' . parent::getRegistry()->getFQURI('rdfs:subClassOf') . '> _:b' . ++$bid . " {$graph_uri} ." . PHP_EOL; $relationship .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('rdf:type') . '> <' . parent::getRegistry()->getFQURI('owl:Class') . "> {$graph_uri} ." . PHP_EOL; $relationship .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:intersectionOf') . '> _:b' . ++$bid . " {$graph_uri} ." . PHP_EOL; } /* relationship: develops_from VAO:0000092 ! chondrogenic condensation relationship: OBO_REL:has_part VAO:0000040 ! cartilage tissue */ $c = explode(" ", $a[1]); if (count($c) == 1) { // just a class parent::getRegistry()->parseQName($c[0], $ns, $id); $relationship .= parent::getRegistry()->getFQURI("{$ns}:{$id}"); $buf .= parent::triplify($tid, "rdfs:subClassOf", "{$ns}:{$id}"); } else { if (count($c) == 2) { // an expression parent::getRegistry()->parseQName($c[0], $pred_ns, $pred_id); parent::getRegistry()->parseQName($c[1], $obj_ns, $obj_id); $relationship .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:onProperty') . '> <' . parent::getRegistry()->getFQURI("obo_vocabulary:" . $pred_id) . "> {$graph_uri} ." . PHP_EOL; $relationship .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:someValuesFrom') . '> <' . parent::getRegistry()->getFQURI("{$obj_ns}:{$obj_id}") . "> {$graph_uri} ." . PHP_EOL; $buf .= parent::triplify($tid, "obo_vocabulary:{$pred_id}", "{$obj_ns}:{$obj_id}"); } } } else { // default handler if (isset($a[1])) { $buf .= parent::triplifyString($tid, "obo_vocabulary:{$a['0']}", str_replace(array("\"", "'"), array("", "\\\\'"), stripslashes($a[1]))); } } } } } } } } } } } } } else { //header //format-version: 1.0 $buf .= parent::triplifyString($ouri, "obo_vocabulary:{$a['0']}", str_replace(array('"', '\\:'), array('\\"', ':'), isset($a[1]) ? $a[1] : "")); } } if ($minimal || $minimalp) { parent::getWriteFile()->write($min); } else { parent::getWriteFile()->write($buf); } $min = ''; $buf = ''; $header = ''; } //if(isset($intersection_of)) $buf .= $intersection_of.")].".PHP_EOL; //if(isset($relationship)) $buf .= $relationship.")].".PHP_EOL; if ($minimal || $minimalp) { parent::getWriteFile()->Write($min); } else { parent::getWriteFile()->write($buf); } }