Пример #1
0
function OBO2TTL($indir, $outdir, $file)
{
    global $gns, $gns_backup, $options;
    $gns = $gns_backup;
    $infile = $indir . $file;
    $outfile = $outdir . $file . '.ttl';
    $in = gzopen($infile, "r");
    if (FALSE === $in) {
        trigger_error("unable to open " . $infile);
        exit;
    }
    $out = fopen($outfile, "w");
    if (FALSE === $out) {
        trigger_error("unable to open " . $outfile);
        exit;
    }
    echo "Converting {$infile} to {$outfile}" . PHP_EOL;
    if (FALSE !== ($pos = strrpos($infile, '\\'))) {
        $file = substr($infile, $pos + 1);
    } else {
        if (FALSE !== ($pos = strrpos($infile, '/'))) {
            $file = substr($infile, $pos + 1);
        } else {
            $file = $infile;
        }
    }
    $file .= ".ttl";
    $pos = strpos($file, ".");
    $ontology = substr($file, 0, $pos);
    $furi = "bio2rdf_resource:file/{$file}";
    $ouri = "registry:{$ontology}";
    $header = N3NSHeader();
    $buf = QQuad($furi, "rdf:type", "sio:Document");
    $buf .= QQuadL($furi, "rdfs:label", "Turtle converted OBO file for {$ontology} ontology (obtained through NCBO Bioportal) [bio2rdf_resource:{$file}]");
    $buf .= QQuadL($furi, "dc:creator", "Michel Dumontier");
    $buf .= QQuadL($furi, "sio:encodes", $ouri);
    $buf .= QQuad($ouri, "rdf:type", "owl:Ontology");
    $buf .= QQuadL($ouri, "rdfs:label", "{$ontology} ontology");
    $buf .= QQuad($ouri, "sio:is-encoded-by", $furi);
    $tid = '';
    $first = true;
    $is_a = false;
    $is_deprecated = false;
    $min = $buf;
    while ($l = gzgets($in)) {
        $lt = trim($l);
        if (strlen($lt) == 0) {
            continue;
        }
        if ($lt[0] == '!') {
            continue;
        }
        if (strstr($l, "[Term]")) {
            // top level node?
            if ($first == true) {
                // ignore the first case
                $first = false;
            } else {
                if ($tid != '' && $is_a == false && $is_deprecated == false) {
                    $t = QQuad($tid, "rdfs:subClassOf", "bio2rdf_vocabulary:Entity");
                    $buf .= $t;
                    $min .= $t;
                }
            }
            $is_a = false;
            $is_deprecated = false;
            unset($typedef);
            $term = '';
            $tid = '';
            continue;
        } else {
            if (strstr($l, "[Typedef]")) {
                $is_a = false;
                $is_deprecated = false;
                unset($term);
                $tid = '';
                $typedef = '';
                continue;
            }
        }
        // to fix error in obo generator
        $lt = str_replace("synonym ", "synonym: ", $lt);
        $lt = preg_replace("/\\{.*\\} !/", " !", $lt);
        $a = explode(" !", $lt);
        if (isset($a[1])) {
            $exc = trim($a[1]);
        }
        $a = explode(": ", trim($a[0]), 2);
        // let's go
        if (isset($intersection_of)) {
            if ($a[0] != "intersection_of") {
                $intersection_of .= ")]." . PHP_EOL;
                $buf .= $intersection_of;
                if ($options['minimal+']['value'] == 'true') {
                    $min .= $intersection_of;
                }
                unset($intersection_of);
            }
        }
        if (isset($typedef)) {
            if ($a[0] == "id") {
                $c = explode(":", $a[1]);
                if (count($c) == 1) {
                    $ns = "obo";
                    $id = $c[0];
                } else {
                    $ns = strtolower($c[0]);
                    $id = $c[1];
                }
                $id = str_replace(array("(", ")"), array("_", ""), $id);
                $tid = $ns . ":" . $id;
                $header .= AddToGlobalNS($ns);
                $buf .= QQuadL($tid, "dc:identifier", $tid);
            } else {
                if ($a[0] == "name") {
                    $buf .= QQuadL($tid, "rdfs:label", addslashes(stripslashes($a[1])) . " [{$tid}]");
                } else {
                    if ($a[0] == "is_a") {
                        if (FALSE !== ($pos = strpos($a[1], "!"))) {
                            $a[1] = substr($a[1], 0, $pos - 1);
                        }
                        $buf .= QQuad($tid, "rdfs:subPropertyOf", "obo:" . strtolower($a[1]));
                    } else {
                        if ($a[0] == "is_obsolete") {
                            $buf .= QQuad($tid, "rdf:type", "owl:DeprecatedClass");
                            $is_deprecated = true;
                        } else {
                            if ($a[0][0] == "!") {
                                $a[0] = substr($a[0], 1);
                            }
                            $buf .= QQuadL($tid, "obo:{$a['0']}", str_replace('"', '', stripslashes($a[1])));
                        }
                    }
                }
            }
        } else {
            if (isset($term)) {
                if ($a[0] == "is_obsolete" && $a[1] == "true") {
                    $t = QQuad($tid, "rdf:type", "owl:DeprecatedClass");
                    $t .= QQuad($tid, "rdfs:subClassOf", "owl:DeprecatedClass");
                    $min .= $t;
                    $buf .= $t;
                    $is_deprecated = true;
                } else {
                    if ($a[0] == "id") {
                        ParseQNAME($a[1], $ns, $id);
                        $header .= AddToGlobalNS($ns);
                        $tid = $ns . ":" . $id;
                        $buf .= QQuad($tid, "rdfs:isDefinedBy", $ouri);
                        $buf .= QQuadL($tid, "dc:identifier", $tid);
                    } else {
                        if ($a[0] == "name") {
                            $t = QQuadL($tid, "rdfs:label", str_replace(array("\"", "'"), array("", "\\\\'"), stripslashes($a[1])) . " [{$tid}]");
                            $min .= $t;
                            $buf .= $t;
                        } else {
                            if ($a[0] == "def") {
                                $t = str_replace(array("'", "\"", "\\", "\\\\'"), array("\\\\'", "", "", ""), $a[1]);
                                $min .= QQuadL($tid, "dc:description", $t);
                                $buf .= QQuadL($tid, "dc:description", $t);
                            } else {
                                if ($a[0] == "property_value") {
                                    $b = explode(" ", $a[1]);
                                    $buf .= QQuadL($tid, "obo_vocabulary:" . strtolower($b[0]), str_replace("\"", "", strtolower($b[1])));
                                } else {
                                    if ($a[0] == "xref") {
                                        // http://upload.wikimedia.org/wikipedia/commons/3/34/Anatomical_Directions_and_Axes.JPG
                                        // Medical Dictionary:http\://www.medterms.com/
                                        // KEGG COMPOUND:C02788 "KEGG COMPOUND"
                                        // first get the comment
                                        if (FALSE !== ($pos = strpos($a[1], '"'))) {
                                            $comment = substr($a[1], $pos + 1, -1);
                                            $identifier = substr($a[1], 0, $pos - 1);
                                        } else {
                                            $identifier = $a[1];
                                        }
                                        // next identify the namespace and identifier
                                        if (FALSE !== ($pos = strpos($identifier, ":"))) {
                                            $id = trim(substr($identifier, $pos + 1));
                                            $raw_ns = strtolower(substr($identifier, 0, $pos));
                                            // the raw ns is likely to be very dirty
                                            // should map to the registry
                                            // but for now, just add this namespace
                                            $ns = str_replace(" ", "_", $raw_ns);
                                            $header .= AddToGlobalNS($ns);
                                            if (strstr($id, "http")) {
                                                $buf .= Quad(GetFQURI($tid), GetFQURI("rdfs:seeAlso"), stripslashes($id));
                                            } else {
                                                $buf .= QQuad($tid, "rdfs:seeAlso", strtolower($ns) . ":" . str_replace(" ", " ", stripslashes($id)));
                                            }
                                        }
                                    } else {
                                        if ($a[0] == "synonym") {
                                            // synonym: "entidades moleculares" RELATED [IUPAC:]
                                            // synonym: "molecular entity" EXACT IUPAC_NAME [IUPAC:]
                                            // synonym: "Chondrococcus macrosporus" RELATED synonym [NCBITaxonRef:Krzemieniewska_and_Krzemieniewski_1926]
                                            //grab string inside double quotes
                                            preg_match('/"(.*)"(.*)/', $a[1], $matches);
                                            if (!empty($matches)) {
                                                $a[1] = str_replace(array("\\", "\"", "'"), array("", "", "\\\\'"), $matches[1] . $matches[2]);
                                            } else {
                                                $a[1] = str_replace(array("\"", "'"), array("", "\\\\'"), $a[1]);
                                            }
                                            $rel = "SYNONYM";
                                            $list = array("EXACT", "BROAD", "RELATED", "NARROW");
                                            $found = false;
                                            foreach ($list as $keyword) {
                                                // get everything after the keyword up until the bracket [
                                                if (FALSE !== ($k_pos = strpos($a[1], $keyword))) {
                                                    $str_len = strlen($a[1]);
                                                    $keyword_len = strlen($keyword);
                                                    $keyword_end_pos = $k_pos + $keyword_len;
                                                    $b1_pos = strrpos($a[1], "[");
                                                    $b2_pos = strrpos($a[1], "]");
                                                    $b_text = substr($a[1], $b1_pos + 1, $b2_pos - $b1_pos - 1);
                                                    $diff = $b1_pos - $keyword_end_pos - 1;
                                                    if ($diff != 0) {
                                                        // then there is more stuff here
                                                        $k = substr($a[1], $keyword_end_pos + 1, $diff);
                                                        $rel = trim($k);
                                                    } else {
                                                        // create the long predicate
                                                        $rel = $keyword . "_SYNONYM";
                                                    }
                                                    $found = true;
                                                    $str = substr($a[1], 0, $k_pos - 1);
                                                    break;
                                                }
                                            }
                                            // check to see if we still haven't found anything
                                            if ($found === false) {
                                                // we didn't find one of the keywords
                                                // so take from the start to the bracket
                                                $b1_pos = strrpos($a[1], "[");
                                                $str = substr($a[1], 0, $b1_pos - 1);
                                            }
                                            $rel = str_replace(" ", "_", $rel);
                                            // $lit = addslashes($str.($b_text?" [".$b_text."]":""));
                                            $l = QQuadL($tid, "obo_vocabulary:" . strtolower($rel), $str);
                                            $buf .= $l;
                                        } else {
                                            if ($a[0] == "alt_id") {
                                                ParseQNAME($a[1], $ns, $id);
                                                if ($id != 'curators') {
                                                    $header .= AddToGlobalNS($ns);
                                                    $buf .= QQuad("{$ns}:{$id}", "rdfs:seeAlso", $tid);
                                                }
                                            } else {
                                                if ($a[0] == "is_a") {
                                                    // do subclassing
                                                    ParseQNAME($a[1], $ns, $id);
                                                    $header .= AddToGlobalNS($ns);
                                                    $t = QQuad($tid, "rdfs:subClassOf", "{$ns}:{$id}");
                                                    $buf .= $t;
                                                    $min .= $t;
                                                    $is_a = true;
                                                } else {
                                                    if ($a[0] == "intersection_of") {
                                                        if (!isset($intersection_of)) {
                                                            $intersection_of = GetFQURITTL($tid) . ' ' . GetFQURITTL('owl:equivalentClass') . ' [' . GetFQURITTL('rdf:type') . ' ' . GetFQURITTL('owl:Class') . '; ' . GetFQURITTL('owl:intersectionOf') . ' (';
                                                        }
                                                        /*
                                                        intersection_of: develops_from VAO:0000092 ! chondrogenic condensation
                                                        intersection_of: OBO_REL:has_part VAO:0000040 ! cartilage tissue
                                                        */
                                                        $c = explode(" ", $a[1]);
                                                        if (count($c) == 1) {
                                                            // just a class
                                                            ParseQNAME($c[0], $ns, $id);
                                                            $header .= AddToGlobalNS($ns);
                                                            $intersection_of .= GetFQURITTL("{$ns}:{$id}");
                                                        } else {
                                                            if (count($c) == 2) {
                                                                // an expression
                                                                ParseQNAME($c[0], $pred_ns, $pred_id);
                                                                $header .= AddToGlobalNS($pred_ns);
                                                                ParseQNAME($c[1], $obj_ns, $obj_id);
                                                                $header .= AddToGlobalNS($obj_ns);
                                                                $intersection_of .= ' [' . GetFQURITTL('owl:onProperty') . ' ' . GetFQURITTL("obo:" . $pred_id) . '; ' . GetFQURITTL('owl:someValuesFrom') . ' ' . GetFQURITTL("{$obj_ns}:{$obj_id}") . '] ';
                                                            }
                                                        }
                                                    } else {
                                                        if ($a[0] == "relationship") {
                                                            if (!isset($relationship)) {
                                                                $relationship = GetFQURITTL($tid) . ' ' . GetFQURITTL('rdfs:subClassOf') . ' [' . GetFQURITTL('rdf:type') . ' ' . GetFQURITTL('owl:Class') . '; ' . GetFQURITTL('owl:intersectionOf') . ' (';
                                                            }
                                                            /*
                                                            relationship: develops_from VAO:0000092 ! chondrogenic condensation
                                                            relationship: OBO_REL:has_part VAO:0000040 ! cartilage tissue
                                                            */
                                                            $c = explode(" ", $a[1]);
                                                            if (count($c) == 1) {
                                                                // just a class
                                                                ParseQNAME($c[0], $ns, $id);
                                                                $header .= AddToGlobalNS($ns);
                                                                $relationship .= GetFQURITTL("{$ns}:{$id}");
                                                            } else {
                                                                if (count($c) == 2) {
                                                                    // an expression
                                                                    ParseQNAME($c[0], $pred_ns, $pred_id);
                                                                    $header .= AddToGlobalNS($pred_ns);
                                                                    ParseQNAME($c[1], $obj_ns, $obj_id);
                                                                    $header .= AddToGlobalNS($obj_ns);
                                                                    $relationship .= ' [' . GetFQURITTL('owl:onProperty') . ' ' . GetFQURITTL("obo:" . $pred_id) . '; ' . GetFQURITTL('owl:someValuesFrom') . ' ' . GetFQURITTL("{$obj_ns}:{$obj_id}") . '] ';
                                                                    $relationship .= ")]." . PHP_EOL;
                                                                    $buf .= $relationship;
                                                                    if ($options['minimal+']['value'] == 'true') {
                                                                        $min .= $relationship;
                                                                    }
                                                                    unset($relationship);
                                                                }
                                                            }
                                                        } else {
                                                            // default handler
                                                            $buf .= QQuadL($tid, "obo:{$a['0']}", str_replace(array("\"", "'"), array("", "\\\\'"), stripslashes($a[1])));
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            } else {
                //header
                //format-version: 1.0
                $a = explode(": ", trim($l));
                $buf .= QQuadL($ouri, "obo:{$a['0']}", str_replace(array('"', '\\:'), array('\\"', ':'), isset($a[1]) ? $a[1] : ""));
            }
        }
        fwrite($out, $header);
        if ($options['minimal']['value'] == 'true' || $options['minimal+']['value'] == 'true') {
            fwrite($out, $min);
        } else {
            fwrite($out, $buf);
        }
        $min = '';
        $buf = '';
        $header = '';
    }
    if (isset($intersection_of)) {
        $buf .= $intersection_of . ")]." . PHP_EOL;
    }
    if (isset($relationship)) {
        $buf .= $relationship . ")]." . PHP_EOL;
    }
    gzclose($in);
    if ($options['minimal']['value'] == 'true' || $options['minimal+']['value'] == 'true') {
        fwrite($out, $min);
    } else {
        fwrite($out, $buf);
    }
    fclose($out);
}
Пример #2
0
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
define('BIO2RDF_URI', 'http://bio2rdf.org/');
// namespace declarations
$gns = array('xsd' => 'http://www.w3.org/2001/XMLSchema#', 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'rdfs' => 'http://www.w3.org/2000/01/rdf-schema#', 'owl' => 'http://www.w3.org/2002/07/owl#', 'dc' => 'http://purl.org/dc/terms/', 'skos' => 'http://www.w3.org/2004/02/skos/core#', 'foaf' => 'http://xmlns.com/foaf/0.1/', 'sio' => 'http://semanticscience.org/resource/', 'bio2rdf' => BIO2RDF_URI, 'bio2rdf_resource' => BIO2RDF_URI . 'bio2rdf_resource:', 'bio2rdf_vocabulary' => BIO2RDF_URI . 'bio2rdf_vocabulary:');
// valid dataset namespaces
$gdataset_ns = array('afcs', 'apo', 'atc', 'bind', 'bindingdb', 'biogrid', 'blastprodom', 'candida', 'cas', 'chebi', 'chemspider', 'coil', 'corum', 'ctd', 'cygd', 'dbsnp', 'dip', 'ddbj', 'dpd', 'drugbank', 'ec', 'embl', 'ensembl', 'eco', 'euroscarf', 'flybase', 'fprintscan', 'kegg', 'gene3d', 'genbank', 'geneid', 'germonline', 'go', 'gp', 'grid', 'het', 'hprd', 'innatedb', 'intact', 'ipi', 'irefindex', 'iubmb', "rogid", "irogid", "rigid", "irigid", "crigid", "crogid", "icrogid", "icrigid", 'iupharligand', 'matrixdb', 'mesh', 'metacyc', 'mi', 'mint', 'mips', 'mpact', 'mpi', 'ncbi', 'ndc', 'refseq', 'obo', 'omim', 'ophid', 'patternscan', 'pato', 'panther', 'pfam', 'pharmgkb', 'pir', 'prf', 'prodom', 'profilescan', 'pdb', 'pubmed', 'pubchem', 'pubchemcompound', 'pubchemsubstance', 'reactome', 'registry', 'registry_dataset', 'seg', 'sgd', 'smart', 'snomed', 'so', 'superfamily', 'swissprot', 'symbol', 'taxon', 'tcdb', 'tigr', 'tpg', 'trembl', 'ttd', 'offsides', 'twosides', 'umls', 'uniparc', 'uniprot', 'uo');
// add the valid namespaces to the global namespace array
foreach ($gdataset_ns as $ns) {
    AddToGlobalNS($ns, true);
}
$gns_backup = $gns;
function AddToGlobalNS($ns, $add_voc_and_resource = false)
{
    global $gns;
    if (!isset($gns[$ns])) {
        $gns[$ns] = BIO2RDF_URI . $ns . ':';
        if ($add_voc_and_resource) {
            $gns[$ns . '_vocabulary'] = BIO2RDF_URI . $ns . '_vocabulary:';
            $gns[$ns . '_resource'] = BIO2RDF_URI . $ns . '_resource:';
        }
        return "@prefix {$ns}: <http://bio2rdf.org/{$ns}:> ." . PHP_EOL;
    }
    return '';
}