/** Generate an n-triple statement */ function QQuad($subject, $predicate, $object, $graph = null) { global $gns; $s = explode(":", $subject); $p = explode(":", $predicate); $o = explode(":", $object); if (!isset($gns[$s[0]])) { trigger_error("Invalid subject qname " . $s[0]); exit; } if (!isset($gns[$p[0]])) { trigger_error("Invalid predicate qname " . $p[0]); exit; } if (!isset($gns[$o[0]])) { trigger_error("Invalid object qname " . $o[0]); exit; } return Quad($gns[$s[0]] . $s[1], $gns[$p[0]] . $p[1], $gns[$o[0]] . $o[1]); }
function write_type_relation_type_counts($fh, $counts) { global $options; if ($counts !== null) { foreach ($counts as $pred => $count) { fwrite($fh, Quad("http://bio2rdf.org/dataset_resource:" . md5($options['url']), "http://bio2rdf.org/dataset_vocabulary:has_type_relation_type_count", "http://bio2rdf.org/dataset_resource:" . md5($options['url'] . $pred . "type_relation_type_count"))); fwrite($fh, Quad("http://bio2rdf.org/dataset_resource:" . md5($options['url'] . $pred . "type_relation_type_count"), "http://bio2rdf.org/dataset_vocabulary:has_predicate", $pred)); fwrite($fh, Quad("http://bio2rdf.org/dataset_resource:" . md5($options['url'] . $pred . "type_relation_type_count"), "http://bio2rdf.org/dataset_vocabulary:has_subject_type", $count["count"]["subject_type"])); fwrite($fh, QuadLiteral("http://bio2rdf.org/dataset_resource:" . md5($options['url'] . $pred . "type_relation_type_count"), "http://bio2rdf.org/dataset_vocabulary:has_subject_count", $count["count"]["subject_count"])); fwrite($fh, Quad("http://bio2rdf.org/dataset_resource:" . md5($options['url'] . $pred . "type_relation_type_count"), "http://bio2rdf.org/dataset_vocabulary:has_object_type", $count["count"]["object_type"])); fwrite($fh, QuadLiteral("http://bio2rdf.org/dataset_resource:" . md5($options['url'] . $pred . "type_relation_type_count"), "http://bio2rdf.org/dataset_vocabulary:has_object_count", $count["count"]["object_count"])); } } }
function OBO2TTL($indir, $outdir, $file) { global $gns, $gns_backup, $options; $gns = $gns_backup; $infile = $indir . $file; $outfile = $outdir . $file . '.ttl'; $in = gzopen($infile, "r"); if (FALSE === $in) { trigger_error("unable to open " . $infile); exit; } $out = fopen($outfile, "w"); if (FALSE === $out) { trigger_error("unable to open " . $outfile); exit; } echo "Converting {$infile} to {$outfile}" . PHP_EOL; if (FALSE !== ($pos = strrpos($infile, '\\'))) { $file = substr($infile, $pos + 1); } else { if (FALSE !== ($pos = strrpos($infile, '/'))) { $file = substr($infile, $pos + 1); } else { $file = $infile; } } $file .= ".ttl"; $pos = strpos($file, "."); $ontology = substr($file, 0, $pos); $furi = "bio2rdf_resource:file/{$file}"; $ouri = "registry:{$ontology}"; $header = N3NSHeader(); $buf = QQuad($furi, "rdf:type", "sio:Document"); $buf .= QQuadL($furi, "rdfs:label", "Turtle converted OBO file for {$ontology} ontology (obtained through NCBO Bioportal) [bio2rdf_resource:{$file}]"); $buf .= QQuadL($furi, "dc:creator", "Michel Dumontier"); $buf .= QQuadL($furi, "sio:encodes", $ouri); $buf .= QQuad($ouri, "rdf:type", "owl:Ontology"); $buf .= QQuadL($ouri, "rdfs:label", "{$ontology} ontology"); $buf .= QQuad($ouri, "sio:is-encoded-by", $furi); $tid = ''; $first = true; $is_a = false; $is_deprecated = false; $min = $buf; while ($l = gzgets($in)) { $lt = trim($l); if (strlen($lt) == 0) { continue; } if ($lt[0] == '!') { continue; } if (strstr($l, "[Term]")) { // top level node? if ($first == true) { // ignore the first case $first = false; } else { if ($tid != '' && $is_a == false && $is_deprecated == false) { $t = QQuad($tid, "rdfs:subClassOf", "bio2rdf_vocabulary:Entity"); $buf .= $t; $min .= $t; } } $is_a = false; $is_deprecated = false; unset($typedef); $term = ''; $tid = ''; continue; } else { if (strstr($l, "[Typedef]")) { $is_a = false; $is_deprecated = false; unset($term); $tid = ''; $typedef = ''; continue; } } // to fix error in obo generator $lt = str_replace("synonym ", "synonym: ", $lt); $lt = preg_replace("/\\{.*\\} !/", " !", $lt); $a = explode(" !", $lt); if (isset($a[1])) { $exc = trim($a[1]); } $a = explode(": ", trim($a[0]), 2); // let's go if (isset($intersection_of)) { if ($a[0] != "intersection_of") { $intersection_of .= ")]." . PHP_EOL; $buf .= $intersection_of; if ($options['minimal+']['value'] == 'true') { $min .= $intersection_of; } unset($intersection_of); } } if (isset($typedef)) { if ($a[0] == "id") { $c = explode(":", $a[1]); if (count($c) == 1) { $ns = "obo"; $id = $c[0]; } else { $ns = strtolower($c[0]); $id = $c[1]; } $id = str_replace(array("(", ")"), array("_", ""), $id); $tid = $ns . ":" . $id; $header .= AddToGlobalNS($ns); $buf .= QQuadL($tid, "dc:identifier", $tid); } else { if ($a[0] == "name") { $buf .= QQuadL($tid, "rdfs:label", addslashes(stripslashes($a[1])) . " [{$tid}]"); } else { if ($a[0] == "is_a") { if (FALSE !== ($pos = strpos($a[1], "!"))) { $a[1] = substr($a[1], 0, $pos - 1); } $buf .= QQuad($tid, "rdfs:subPropertyOf", "obo:" . strtolower($a[1])); } else { if ($a[0] == "is_obsolete") { $buf .= QQuad($tid, "rdf:type", "owl:DeprecatedClass"); $is_deprecated = true; } else { if ($a[0][0] == "!") { $a[0] = substr($a[0], 1); } $buf .= QQuadL($tid, "obo:{$a['0']}", str_replace('"', '', stripslashes($a[1]))); } } } } } else { if (isset($term)) { if ($a[0] == "is_obsolete" && $a[1] == "true") { $t = QQuad($tid, "rdf:type", "owl:DeprecatedClass"); $t .= QQuad($tid, "rdfs:subClassOf", "owl:DeprecatedClass"); $min .= $t; $buf .= $t; $is_deprecated = true; } else { if ($a[0] == "id") { ParseQNAME($a[1], $ns, $id); $header .= AddToGlobalNS($ns); $tid = $ns . ":" . $id; $buf .= QQuad($tid, "rdfs:isDefinedBy", $ouri); $buf .= QQuadL($tid, "dc:identifier", $tid); } else { if ($a[0] == "name") { $t = QQuadL($tid, "rdfs:label", str_replace(array("\"", "'"), array("", "\\\\'"), stripslashes($a[1])) . " [{$tid}]"); $min .= $t; $buf .= $t; } else { if ($a[0] == "def") { $t = str_replace(array("'", "\"", "\\", "\\\\'"), array("\\\\'", "", "", ""), $a[1]); $min .= QQuadL($tid, "dc:description", $t); $buf .= QQuadL($tid, "dc:description", $t); } else { if ($a[0] == "property_value") { $b = explode(" ", $a[1]); $buf .= QQuadL($tid, "obo_vocabulary:" . strtolower($b[0]), str_replace("\"", "", strtolower($b[1]))); } else { if ($a[0] == "xref") { // http://upload.wikimedia.org/wikipedia/commons/3/34/Anatomical_Directions_and_Axes.JPG // Medical Dictionary:http\://www.medterms.com/ // KEGG COMPOUND:C02788 "KEGG COMPOUND" // first get the comment if (FALSE !== ($pos = strpos($a[1], '"'))) { $comment = substr($a[1], $pos + 1, -1); $identifier = substr($a[1], 0, $pos - 1); } else { $identifier = $a[1]; } // next identify the namespace and identifier if (FALSE !== ($pos = strpos($identifier, ":"))) { $id = trim(substr($identifier, $pos + 1)); $raw_ns = strtolower(substr($identifier, 0, $pos)); // the raw ns is likely to be very dirty // should map to the registry // but for now, just add this namespace $ns = str_replace(" ", "_", $raw_ns); $header .= AddToGlobalNS($ns); if (strstr($id, "http")) { $buf .= Quad(GetFQURI($tid), GetFQURI("rdfs:seeAlso"), stripslashes($id)); } else { $buf .= QQuad($tid, "rdfs:seeAlso", strtolower($ns) . ":" . str_replace(" ", " ", stripslashes($id))); } } } else { if ($a[0] == "synonym") { // synonym: "entidades moleculares" RELATED [IUPAC:] // synonym: "molecular entity" EXACT IUPAC_NAME [IUPAC:] // synonym: "Chondrococcus macrosporus" RELATED synonym [NCBITaxonRef:Krzemieniewska_and_Krzemieniewski_1926] //grab string inside double quotes preg_match('/"(.*)"(.*)/', $a[1], $matches); if (!empty($matches)) { $a[1] = str_replace(array("\\", "\"", "'"), array("", "", "\\\\'"), $matches[1] . $matches[2]); } else { $a[1] = str_replace(array("\"", "'"), array("", "\\\\'"), $a[1]); } $rel = "SYNONYM"; $list = array("EXACT", "BROAD", "RELATED", "NARROW"); $found = false; foreach ($list as $keyword) { // get everything after the keyword up until the bracket [ if (FALSE !== ($k_pos = strpos($a[1], $keyword))) { $str_len = strlen($a[1]); $keyword_len = strlen($keyword); $keyword_end_pos = $k_pos + $keyword_len; $b1_pos = strrpos($a[1], "["); $b2_pos = strrpos($a[1], "]"); $b_text = substr($a[1], $b1_pos + 1, $b2_pos - $b1_pos - 1); $diff = $b1_pos - $keyword_end_pos - 1; if ($diff != 0) { // then there is more stuff here $k = substr($a[1], $keyword_end_pos + 1, $diff); $rel = trim($k); } else { // create the long predicate $rel = $keyword . "_SYNONYM"; } $found = true; $str = substr($a[1], 0, $k_pos - 1); break; } } // check to see if we still haven't found anything if ($found === false) { // we didn't find one of the keywords // so take from the start to the bracket $b1_pos = strrpos($a[1], "["); $str = substr($a[1], 0, $b1_pos - 1); } $rel = str_replace(" ", "_", $rel); // $lit = addslashes($str.($b_text?" [".$b_text."]":"")); $l = QQuadL($tid, "obo_vocabulary:" . strtolower($rel), $str); $buf .= $l; } else { if ($a[0] == "alt_id") { ParseQNAME($a[1], $ns, $id); if ($id != 'curators') { $header .= AddToGlobalNS($ns); $buf .= QQuad("{$ns}:{$id}", "rdfs:seeAlso", $tid); } } else { if ($a[0] == "is_a") { // do subclassing ParseQNAME($a[1], $ns, $id); $header .= AddToGlobalNS($ns); $t = QQuad($tid, "rdfs:subClassOf", "{$ns}:{$id}"); $buf .= $t; $min .= $t; $is_a = true; } else { if ($a[0] == "intersection_of") { if (!isset($intersection_of)) { $intersection_of = GetFQURITTL($tid) . ' ' . GetFQURITTL('owl:equivalentClass') . ' [' . GetFQURITTL('rdf:type') . ' ' . GetFQURITTL('owl:Class') . '; ' . GetFQURITTL('owl:intersectionOf') . ' ('; } /* intersection_of: develops_from VAO:0000092 ! chondrogenic condensation intersection_of: OBO_REL:has_part VAO:0000040 ! cartilage tissue */ $c = explode(" ", $a[1]); if (count($c) == 1) { // just a class ParseQNAME($c[0], $ns, $id); $header .= AddToGlobalNS($ns); $intersection_of .= GetFQURITTL("{$ns}:{$id}"); } else { if (count($c) == 2) { // an expression ParseQNAME($c[0], $pred_ns, $pred_id); $header .= AddToGlobalNS($pred_ns); ParseQNAME($c[1], $obj_ns, $obj_id); $header .= AddToGlobalNS($obj_ns); $intersection_of .= ' [' . GetFQURITTL('owl:onProperty') . ' ' . GetFQURITTL("obo:" . $pred_id) . '; ' . GetFQURITTL('owl:someValuesFrom') . ' ' . GetFQURITTL("{$obj_ns}:{$obj_id}") . '] '; } } } else { if ($a[0] == "relationship") { if (!isset($relationship)) { $relationship = GetFQURITTL($tid) . ' ' . GetFQURITTL('rdfs:subClassOf') . ' [' . GetFQURITTL('rdf:type') . ' ' . GetFQURITTL('owl:Class') . '; ' . GetFQURITTL('owl:intersectionOf') . ' ('; } /* relationship: develops_from VAO:0000092 ! chondrogenic condensation relationship: OBO_REL:has_part VAO:0000040 ! cartilage tissue */ $c = explode(" ", $a[1]); if (count($c) == 1) { // just a class ParseQNAME($c[0], $ns, $id); $header .= AddToGlobalNS($ns); $relationship .= GetFQURITTL("{$ns}:{$id}"); } else { if (count($c) == 2) { // an expression ParseQNAME($c[0], $pred_ns, $pred_id); $header .= AddToGlobalNS($pred_ns); ParseQNAME($c[1], $obj_ns, $obj_id); $header .= AddToGlobalNS($obj_ns); $relationship .= ' [' . GetFQURITTL('owl:onProperty') . ' ' . GetFQURITTL("obo:" . $pred_id) . '; ' . GetFQURITTL('owl:someValuesFrom') . ' ' . GetFQURITTL("{$obj_ns}:{$obj_id}") . '] '; $relationship .= ")]." . PHP_EOL; $buf .= $relationship; if ($options['minimal+']['value'] == 'true') { $min .= $relationship; } unset($relationship); } } } else { // default handler $buf .= QQuadL($tid, "obo:{$a['0']}", str_replace(array("\"", "'"), array("", "\\\\'"), stripslashes($a[1]))); } } } } } } } } } } } } else { //header //format-version: 1.0 $a = explode(": ", trim($l)); $buf .= QQuadL($ouri, "obo:{$a['0']}", str_replace(array('"', '\\:'), array('\\"', ':'), isset($a[1]) ? $a[1] : "")); } } fwrite($out, $header); if ($options['minimal']['value'] == 'true' || $options['minimal+']['value'] == 'true') { fwrite($out, $min); } else { fwrite($out, $buf); } $min = ''; $buf = ''; $header = ''; } if (isset($intersection_of)) { $buf .= $intersection_of . ")]." . PHP_EOL; } if (isset($relationship)) { $buf .= $relationship . ")]." . PHP_EOL; } gzclose($in); if ($options['minimal']['value'] == 'true' || $options['minimal+']['value'] == 'true') { fwrite($out, $min); } else { fwrite($out, $buf); } fclose($out); }
function addDatasetPropertyDatasetCount() { global $options; $sparql = "SELECT DISTINCT ?p ?stype ?otype (COUNT(?s) AS ?n)\n" . $options['from-graph'] . " \n{\n\t?s ?p ?o .\n\t?s a ?stype .\n\t?o a ?otype .\n\tFILTER regex (?stype, \"vocabulary:Resource\")\n\tFILTER regex (?otype, \"vocabulary:Resource\")\n\tFILTER (?stype != ?otype)\n}"; $r = query($sparql); foreach ($r as $c) { $id = getID($c); preg_match("/http:\\/\\/bio2rdf.org\\/([^_]+)_vocabulary/", $c->stype->value, $m1); preg_match("/http:\\/\\/bio2rdf.org\\/([^_]+)_vocabulary/", $c->otype->value, $m2); if (isset($m1[1]) and isset($m2[1])) { $d1 = $m1[1]; $d2 = $m2[1]; $r = $c->p->value; $label = "{$d1} connected to {$d2} through " . $c->n->value . " <{$r}> in " . $options['dataset_name']; write(Quad($options['uri'], "http://rdfs.org/ns/void#subset", $id) . Quad($id, "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://rdfs.org/ns/void#LinkSet") . Quad($id, "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://bio2rdf.org/bio2rdf.dataset_vocabulary:Dataset-Dataset-Property-Dataset-Count") . QuadLiteral($id, "http://www.w3.org/2000/01/rdf-schema#label", $label, null, "en") . Quad($id, "http://rdfs.org/ns/void#linkPredicate", $c->p->value) . Quad($id, "http://rdfs.org/ns/void#subjectsTarget", $c->stype->value) . Quad($id, "http://rdfs.org/ns/void#objectsTarget", $c->otype->value) . QuadLiteral($id, "http://rdfs.org/ns/void#triples", $c->n->value, "long") . Quad("http://bio2rdf.org/bio2rdf.dataset_vocabulary:Dataset-Dataset-Property-Dataset-Count", "http://www.w3.org/2000/01/rdf-schema#subClassOf", "http://bio2rdf.org/bio2rdf.dataset_vocabulary:Dataset-Descriptor")); } } }
function rdfize_counts($counts_arr, $dataset_name) { $rdf = ""; foreach ($counts_arr as $i => $nscount) { $ns1 = $nscount['ns1']; $ns2 = $nscount['ns2']; $count = $nscount['count']; $dataset_uri = "http://bio2rdf.org/dataset_resource:" . md5("http://" . $dataset_name . ".bio2rdf.org/sparql"); $count_uri = "http://bio2rdf.org/dataset_resource:" . md5($dataset_name . $ns1 . $ns2 . $count); $rdf .= Quad($dataset_uri, "http://bio2rdf.org/dataset_vocabulary:has_nsns_count", $count_uri); $rdf .= Quad($count_uri, "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://bio2rdf.org/dataset_vocabulary:Namespace_Namespace_Count"); $rdf .= Quad($count_uri, "http://bio2rdf.org/dataset_vocabulary:namespace1", "http://bio2rdf.org/" . $ns1); $rdf .= Quad($count_uri, "http://bio2rdf.org/dataset_vocabulary:namespace2", "http://bio2rdf.org/" . $ns2); $rdf .= QuadLiteral($count_uri, "http://bio2rdforg/dataset_vocabulary:has_nsns_count_value", $count); } return $rdf; }