function CTD_chem_gene_ixn_types() { $first = true; while ($l = $this->GetReadFile()->Read()) { if ($l[0] == '#') { continue; } $a = explode("\t", $l); // check number of columns if ($first) { if (($c = count(explode("\t", $l))) != 4) { trigger_error("CTD_chem_gene_ixn_types function expects 4 fields, found {$c}!" . PHP_EOL, E_USER_WARNING); return FALSE; } $first = false; } $id = $this->getVoc() . $a[1]; $parent = trim($a[3]); if (isset($parent) && !empty($parent)) { $this->AddRDF(parent::describeClass($id, $a[0], $this->getVoc() . $parent, null, $a[2])); } else { $this->AddRDF(parent::describeClass($id, $a[0], null, null, $a[2])); } parent::WriteRDFBufferToWriteFile(); } return TRUE; }
function parseItem($item) { $id = $item['@attributes']['id']; $label = $item['name']; parent::addRDF(parent::describeIndividual($id, $item['name'], parent::getVoc() . "Entry") . parent::describeClass(parent::getVoc() . "Entry", "MIRIAM database entry") . parent::triplifyString($id, parent::getVoc() . "namespace", $item['namespace'])); if (isset($item['@attributes'])) { foreach ($item['@attributes'] as $k => $v) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . $k, $v)); } } if (isset($item['comment'])) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "comment", $item['comment'])); } if (isset($item['definition'])) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "definition", $item['definition'])); } if (isset($item['synonyms'])) { $mylist = null; if (is_array($item['synonyms']['synonym'])) { $mylist = $item['synonyms']['synonym']; } else { $mylist[] = $item['synonyms']['synonym']; } foreach ($mylist as $myitem) { parent::addRDF(parent::triplifyString($id, "skos:altLabel", $myitem)); } } if (isset($item['uris'])) { foreach ($item['uris']['uri'] as $uri) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "uri", $uri)); } } if (isset($item['resources'])) { $mylist = null; if (!isset($item['resources']['resource']['dataEntry'])) { $mylist = $item['resources']['resource']; } else { $mylist[] = $item['resources']['resource']; } foreach ($mylist as $myitem) { $rid = $myitem['@attributes']['id']; parent::addRDF(parent::describeIndividual($rid, $myitem['dataInfo'], parent::getVoc() . "Resource") . parent::describeClass(parent::getVoc() . "Resource", "MIRIAM Resource") . parent::triplify($rid, parent::getVoc() . "url", $myitem['dataResource']) . parent::triplifyString($rid, parent::getVoc() . "urlTemplate", $myitem['dataEntry']) . parent::triplifyString($rid, parent::getVoc() . "organization", is_array($myitem['dataInstitution']) ? "" : $myitem['dataInstitution']) . parent::triplifyString($rid, parent::getVoc() . "location", is_array($myitem['dataLocation']) ? "" : $myitem['dataLocation']) . parent::triplify($id, parent::getVoc() . "resource", $rid)); } } if (isset($item['tags'])) { $i = $item['tags']['tag']; $mylist = null; if (!is_array($i)) { $mylist[] = $i; } else { $mylist = $i; } foreach ($mylist as $myitem) { parent::addRDF(parent::triplifyString($id, parent::getvoc() . "tag", $myitem)); } } if (isset($item['documentations'])) { $i = $item['documentations']['documentation']; $mylist = null; if (!is_array($i)) { $mylist[] = $i; } else { $mylist = $i; } foreach ($mylist as $myitem) { if (strstr($myitem, "pubmed")) { $uri = "pubmed:" . substr($myitem, strrpos($myitem, ":") + 1); } else { if (strstr($myitem, "doi")) { $uri = "http://dx.doi.org/" . substr($myitem, strpos($myitem, "doi:")); } else { $uri = $myitem; } } parent::addRDF(parent::triplify($id, parent::getvoc() . "documentation", $uri)); } } if (isset($item['restrictions'])) { $mylist = null; if (!isset($item['restrictions']['restriction']['statement'])) { $mylist = $item['restrictions']['restriction']; } else { $mylist[] = $item['restrictions']['restriction']; } foreach ($mylist as $i => $myitem) { $rid = parent::getRes() . str_replace(":", "", $id) . "_" . ($i + 1); $a = $myitem['@attributes']; $rid_type = parent::getVoc() . 'restriction_type_' . $a['type']; parent::addRDF(parent::describeIndividual($rid, $a['desc'], parent::getVoc() . "Restriction") . parent::describeClass(parent::getVoc() . "Restriction", "Resource Restriction") . parent::triplify($rid, "rdf:type", $rid_type) . parent::describeClass($rid_type, $a['desc'], parent::getVoc() . "Restriction") . parent::triplifyString($rid, "dct:description", $myitem['statement']) . parent::triplify($rid, "foaf:page", isset($myitem['link']) ? $myitem['link'] : "") . parent::triplify($id, parent::getVoc() . "restriction", $rid)); } } /* <annotation> <format name="SBML"> <elements> <element>reaction</element> <element>event</element> <element>rule</element> <element>species</element> </elements> </format> */ if (isset($item['annotation'])) { $mylist = null; if (!isset($item['annotation']['format']['elements'])) { $mylist = $item['annotation']['format']; } else { $mylist[] = $item['annotation']['format']; } foreach ($mylist as $i => $myitem) { $name = $myitem['@attributes']['name']; $myid = str_replace("MIR:", parent::getRes(), $id) . "_annotation_" . ($i + 1) . "_" . urlencode($name); parent::addRDF(parent::describeIndividual($myid, "{$label} used by {$name}", parent::getVoc() . "ValueSet") . parent::describeClass(parent::getVoc() . "ValueSet", "MIRIAM Value Set") . parent::triplifyString($myid, parent::getVoc() . "used-in", $name) . parent::triplify($myid, parent::getVoc() . "uses", $id)); $b = $myitem['elements']['element']; $mylist2 = null; if (!is_array($b)) { $mylist2[] = $b; } else { $mylist2 = $b; } foreach ($mylist2 as $i => $e) { parent::addRDF(parent::triplifyString($myid, parent::getVoc() . "used-for", $e)); } } } }
private function geneinfo() { $i = 1; $header = $this->GetReadFile()->Read(200000); while ($aLine = $this->GetReadFile()->Read(200000)) { if ($i++ % 1000 == 0) { parent::clear(); } $a = $splitLine = explode("\t", $aLine); if (count($splitLine) == 15) { $taxid = "taxon:" . trim($splitLine[0]); if (isset($this->taxids) and !isset($this->taxids[trim($splitLine[0])])) { continue; } $aGeneId = trim($splitLine[1]); $geneid = "ncbigene:" . trim($splitLine[1]); $symbol = addslashes(stripslashes(trim($splitLine[2]))); $symbolid = "symbol:{$symbol}"; $locusTag = trim($splitLine[3]); $symbols_arr = explode("|", $splitLine[4]); $dbxrefs_arr = explode("|", $splitLine[5]); $chromosome = trim($splitLine[6]); $map_location = trim($splitLine[7]); $description = addslashes(stripslashes(trim($splitLine[8]))); $type_of_gene = trim($splitLine[9]); $symbol_authority = addslashes(stripslashes(trim($splitLine[10]))); $symbol_auth_full_name = addslashes(stripslashes(trim($splitLine[11]))); $nomenclature_status = addslashes(stripslashes(trim($splitLine[12]))); $other_designations = addslashes(stripslashes(trim($splitLine[13]))); $mod_date = date_parse(trim($splitLine[14])); //check for a valid symbol if ($symbol != "NEWENTRY") { $this->AddRDF(parent::describeIndividual($geneid, "{$description} ({$symbolid}, {$taxid})", $this->getVoc() . "Gene") . parent::triplify($geneid, $this->getVoc() . "x-taxonomy", $taxid) . parent::triplifyString($geneid, $this->getVoc() . "symbol", $symbol) . parent::triplifyString($geneid, $this->getVoc() . "locus", addslashes(stripslashes($locusTag))) . parent::describeClass($this->getVoc() . "Gene", "NCBI Gene gene")); if ($type_of_gene != '-') { $this->AddRDF(parent::triplify($geneid, "rdf:type", $this->getVoc() . ucfirst($type_of_gene) . "-Gene") . parent::describeClass($this->getVoc() . ucfirst($type_of_gene) . "-Gene", ucfirst($type_of_gene) . " Gene")); } //symbol synonyms foreach ($symbols_arr as $s) { if ($s != "-") { $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "symbol-synonym", addslashes(stripslashes($s)))); } } //dbxrefs foreach ($dbxrefs_arr as $dbx) { if ($dbx != "-") { $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "dbxref", $dbx)); } } //chromosome if ($chromosome != "-") { $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "chromosome", $chromosome)); } //map location if ($map_location != "-") { $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "map-location", $map_location)); } //description if ($description != "-") { $this->AddRDF(parent::triplifyString($geneid, "dc:description", $description)); } //nomenclature authority if ($symbol_authority != "-") { $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "nomenclature-authority", $symbol_authority)); if ($symbol_auth_full_name != "-") { $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "nomenclature-authority-fullname", $symbol_auth_full_name)); } } //nomenclature status if ($nomenclature_status != "-") { $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "nomenclature-status", $nomenclature_status)); } //other designations if ($other_designations != "-") { foreach (explode("|", $other_designations) as $d) { $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "other-designation", $d)); } } //modification date if ($mod_date != "-") { $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "modification-date", $mod_date["year"] . "-" . $mod_date["month"] . "-" . $mod_date["day"])); } } } parent::writeRDFBufferToWriteFile(); } // while }
/** * process a single pubchem bioactivity record **/ function parse_bioassay_record(&$xml) { $root = $xml->GetXMLRoot(); $root->registerXPathNamespace('x', 'http://www.ncbi.nlm.nih.gov'); // internal identifier $aid = (string) array_shift($root->xpath('//x:PC-AssaySubmit_assay/x:PC-AssaySubmit_assay_descr/x:PC-AssayDescription/x:PC-AssayDescription_aid/x:PC-ID/x:PC-ID_id')); $pid = $this->getPcbNs() . $aid; // text based description $assay_name = (string) array_shift($root->xpath('//x:PC-AssaySubmit_assay/x:PC-AssaySubmit_assay_descr/PC-AssayDescription/x:PC-AssayDescription_name')); parent::addRDF(parent::describeIndividual($pid, $assay_name, $this->getPcbVoc() . "Assay", null, $assay_name) . parent::describeClass($this->getPcbVoc() . "Assay", "PubChem BioAssay")); $version = (string) array_shift($root->xpath('//x:PC-AssaySubmit_assay/x:PC-AssaySubmit_assay_descr/x:PC-AssayDescription/x:PC-AssayDescription_aid/x:PC-ID/x:PC-ID_version')); parent::addRDF(parent::triplifyString($pid, $this->getPcbVoc() . "has-version", parent::safeLiteral($version)) . parent::describeProperty($this->getPcbVoc() . "has-version", "Relationship between a PubChem entity and a version")); // additional identifiers $source_desc = (string) array_shift($root->xpath('//x:PC-AssaySubmit_assay/x:PC-AssaySubmit_assay_descr/x:PC-AssayDescription/x:PC-AssayDescription_aid/x:PC-AssayDescription_aid-source/x:PC-Source/PC-Source_db/x:PC-DBTracking')); $tracking_name = (string) array_shift($root->xpath('//x:PC-AssaySubmit_assay/x:PC-AssaySubmit_assay_descr/x:PC-AssayDescription/x:PC-AssayDescription_aid/x:PC-AssayDescription_aid-source/x:PC-Source/PC-Source_db/x:PC-DBTracking/PC-DBTracking_name')); $tracking_id = (string) array_shift($root->xpath('//x:PC-AssaySubmit_assay/x:PC-AssaySubmit_assay_descr/x:PC-AssayDescription/x:PC-AssayDescription_aid/x:PC-AssayDescription_aid-source/x:PC-Source/PC-Source_db/x:PC-DBTracking/PC-DBTracking_source-id/x:Object-id/Object-id_str')); $xid = $tracking_name . ":" . $tracking_id; parent::addRDF(parent::triplifyString($pid, $this->getPcbVoc() . "xref", $xid)); $assay_descriptions = $root->xpath('//x:PC-AssaySubmit_assay/x:PC-AssaySubmit_assay_descr/x:PC-AssayDescription/x:PC-AssayDescription_description/x:PC-AssayDescription_description_E'); foreach ($assay_descriptions as $assay_description) { $assay_description = (string) $assay_description; if ($assay_description != "") { $assay_description = parent::safeLiteral($assay_description); parent::addRDF(parent::triplifyString($pid, "dc:description", $assay_description)); } } $assay_comments = $root->xpath('//x:PC-AssaySubmit_assay/x:PC-AssaySubmit_assay_descr/x:PC-AssayDescription/x:PC-AssayDescription_comment/x:PC-AssayDescription_comment_E'); foreach ($assay_comments as $assay_comment) { $assay_comment = (string) $assay_comment; $comment = explode(":", $assay_comment); if (count($comment) <= 1) { continue; } $key = $comment[0]; $value = $comment[1]; if ($value == "") { continue; } switch ($key) { case "Putative Target": break; case "Tax ID": if ($value != null) { $value = trim($value); parent::addRDF(parent::triplify($pid, $this->getPcbVoc() . "has-taxid", "taxon:" . $value) . parent::describeProperty($this->getPcbVoc() . "has-taxid", "Relationship between a PubChem BioAssay and a taxonomic identifier")); } break; default: if ($value != null) { $value = trim(parent::safeLiteral($value)); parent::addRDF(parent::triplifyString($pid, "rdfs:comment", $value)); } //if break; } //switch } // xrefs - these are database cross references to pubmed, ncbi gene, and pubchem substance $assay_xrefs = $root->xpath('//x:PC-AssaySubmit_assay/x:PC-AssaySubmit_assay_descr/x:PC-AssayDescription/x:PC-AssayDescription_xref/x:PC-AnnotatedXRef'); foreach ($assay_xrefs as $xref) { //xref data $xref->registerXPathNamespace('x', 'http://www.ncbi.nlm.nih.gov'); $pmids = $xref->xpath("./x:PC-AnnotatedXRef_xref/x:PC-XRefData/x:PC-XRefData_pmid"); $this->db_xrefs($pid, $pmids, "pubmed"); $taxons = $xref->xpath("./x:PC-AnnotatedXRef_xref/x:PC-XRefData/x:PC-XRefData_taxonomy"); $this->db_xrefs($pid, $taxons, "taxon"); $aids = $xref->xpath("./x:PC-AnnotatedXRef_xref/x:PC-XRefData/x:PC-XRefData_aid"); $this->db_xrefs($pid, $aids, $this->getPcbPrefix()); $omims = $xref->xpath("./x:PC-AnnotatedXRef_xref/x:PC-XRefData/x:PC-XRefData_mim"); $this->db_xrefs($pid, $omims, "omim"); } // definitions for allowed result types for a given assay $result_types = $root->xpath('//x:PC-AssaySubmit_assay/x:PC-AssaySubmit_assay_descr/x:PC-AssayDescription/x:PC-AssayDescription_results/x:PC-ResultType'); foreach ($result_types as $result_type) { $result_type->registerXPathNamespace('x', 'http://www.ncbi.nlm.nih.gov'); $name = (string) array_shift($result_type->xpath('./x:PC-ResultType_name')); $tid = (string) array_shift($result_type->xpath('./x:PC-ResultType_tid')); $description = (string) array_shift($result_type->xpath('./x:PC-ResultType_description/x:PC-ResultType_description_E')); $type = (string) array_shift($result_type->xpath('./x:PC-ResultType_type')); $unit = array_shift($result_type->xpath('./x:PC-ResultType_unit')); // create the possible assay types that a result can be; may result in duplication with other experiments $rtid = $this->result_type_id($aid, $tid); parent::addRDF(parent::describeIndividual($rtid, $name, $this->getPcbVoc() . "AssayResultType", $name) . parent::describeClass($this->getPcbVoc() . "AssayResultType", "Result type of a PubChem BioAssay")); if ($description != "") { $description = parent::safeLiteral($description); parent::addRDF(parent::triplifyString($rtid, "dc:description", $description)); } if ($unit != null) { $value = $unit->attributes()->value; parent::addRDF(parent::triplifyString($rtid, $this->getPcbVoc() . "has-unit", parent::safeLiteral($value)) . parent::describeProperty($this->getPcbVoc() . "has-unit", "Relationship between a PubChem BioAssay Result Type and its unit")); } } // project category e.g literature-extracted $project_category = (string) array_shift($root->xpath('//x:PC-AssaySubmit_assay/x:PC-Assay_descr/x:PC-AssayDescription_project-category')); //$this->AddRDF($this->QQuadl($pid,"pubchembioactivity_vocabulary:hasProjectCategory",$project_category)); // result sets - these are containers for multiple assay result sets $results = $root->xpath('//x:PC-AssaySubmit_data/x:PC-AssayResults'); $rsid = $this->getPcbRes() . "resultset_" . md5(implode($results)); $rsid_label = "BioAssay Result Set for {$pid}"; parent::addRDF(parent::describeIndividual($rsid, $rsid_label, $this->getPcbVoc() . "ResultSet") . parent::describeClass($this->getPcbVoc() . "ResultSet", "PubChem BioAssay Result Set")); parent::addRDF(parent::triplify($pid, $this->getPcbVoc() . "has-result-set", $rsid) . parent::describeProperty($this->getPcbVoc() . "has-result-set", "Relationship between a PubChem BioAssay and its result set")); foreach ($results as $result) { $result->registerXPathNamespace('x', 'http://www.ncbi.nlm.nih.gov'); $rid = $this->getPcbRes() . "result_" . md5($result->asXML()); $rid_label = "A PubChem BioAssay Result for {$pid}"; parent::addRDF(parent::describeIndividual($rid, $rid_label, $this->getPcbVoc() . "AssayResult") . parent::describeClass($this->getPcbVoc() . "AssayResult", "PubChem BioAssay Result")); parent::addRDF(parent::triplify($rsid, $this->getPcbVoc() . "has-result", $rid) . parent::describeProperty($this->getPcbVoc() . "has-result", "Relationship between a PubChem BioAssay Result Set and a Result")); // substance id $sid = (string) array_shift($result->xpath('./x:PC-AssayResults_sid')); $psid = $this->getPcsNs() . $sid; parent::addRDF(parent::triplify($rid, $this->getPcbVoc() . "has-substance", $psid) . parent::describeProperty($this->getPcbVoc() . "has-substance", "Relationship between a PubChem BioAssay Result and a PubChem substance")); // pubchem substance version $sid_version = (string) array_shift($result->xpath('./x:PC-AssayResults_version')); if ($sid_version !== "") { parent::addRDF(parent::triplifyString($psid, $this->getPcbVoc() . "has-version", parent::safeLiteral($sid_version))); } $assay_outcome = (string) array_shift($result->xpath('./x:PC-AssayResults_outcome')); parent::addRDF(parent::triplifyString($rid, $this->getPcbVoc() . "has-outcome", parent::safeLiteral($assay_outcome)) . parent::describeProperty($this->getPcbVoc() . "has-outcome", "Relationship between a PubChem BioAssay and an outcome")); $year = (string) array_shift($result->xpath('./x:PC-AssayResults_date/x:Date/x:Date_std/x:Date-std/x:Date-std_year')); $month = (string) array_shift($result->xpath('./x:PC-AssayResults_date/x:Date/x:Date_std/x:Date-std/x:Date-std_month')); $day = (string) array_shift($result->xpath('./x:PC-AssayResults_date/x:Date/x:Date_std/x:Date-std/x:Date-std_day')); parent::addRDF(parent::triplifyString($rid, $this->getPcbVoc() . "has-date", parent::safeLiteral($day . "-" . $month . "-" . $year), "xsd:date") . parent::describeProperty($this->getPcbVoc() . "has-date", "Relationship between a PubChem BioAssay and a date")); // individual result datapoints $assay_data_collection = $result->xpath('./x:PC-AssayResults_data/x:PC-AssayData'); foreach ($assay_data_collection as $assay_data) { // assay data id (what type is it?) $assay_data->registerXPathNamespace('x', 'http://www.ncbi.nlm.nih.gov'); $atype = array_shift($assay_data->xpath('./x:PC-AssayData_tid')); $avalue = (string) array_shift($assay_data->xpath('./x:PC-AssayData_value/*')); $vid = $this->getPcbRes() . "result_value_" . md5($rid . $avalue); $vid_label = "Result value of type " . $atype . " for PubChem BioAssay " . $aid; $vid_type = $this->result_type_id($aid, $atype); parent::addRDF(parent::describeIndividual($vid, $vid_label, $vid_type)); parent::addRDF(parent::triplify($rid, $this->getPcbVoc() . "has-result-value", $vid) . parent::describeProperty($this->getPcbVoc() . "has-result-value", "Relationship between a PubChem BioAssay result and its value resource")); if ($avalue != "" && $avalue != null) { parent::addRDF(parent::triplifyString($vid, "rdf:value", parent::safeLiteral($avalue))); } } } }
function Parse($file) { parent::getReadFile()->read(); // skip the first comment line $line = 1; $first = true; while ($l = parent::getReadFile()->read(500000)) { if ($l[0] == "#") { // dataset attributes $a = explode('=', trim($l)); $r = $this->getVoc() . substr($a[0], 2); if (isset($a[1])) { $v = $a[1]; if ($r == "affymetrix_vocabulary:genome-version-create_date") { $x = explode("-", $a[1]); if ($x[2] == "00") { $x[2] = "01"; } $v = implode("-", $x); } parent::addRDF(parent::triplifyString(parent::getDatasetURI(), $r, $v) . parent::describe($r, "{$r}")); } continue; } if ($first == true) { $first = false; // header $header = explode(",", str_replace('"', '', trim($l))); // print_r($header);exit; $n = count($header); if ($n != 41) { trigger_error("Expecting 41 columns, found {$n} in header on line {$line}!", E_USER_ERROR); exit; } continue; } $a = explode('","', substr($l, 1, -2)); $n = count($a); if ($n != 41) { trigger_error("Expecting 41 columns, found {$n} on line {$line}!", E_USER_ERROR); exit; } parent::writeRDFBufferToWriteFile(); $id = $a[0]; $qname = "affymetrix:{$id}"; $label = "probeset {$a['0']} on GeneChip {$a['1']} ({$a['2']})"; parent::addRDF(parent::describeIndividual($qname, $label, $this->getVoc() . "Probeset") . parent::describeClass($this->getVoc() . "Probeset", "Affymetrix probeset")); trigger_error($id, E_USER_NOTICE); // now process the entries foreach ($a as $k => $v) { if (trim($v) == '---') { continue; } // multi-valued entries are separated by //// $b = explode(" /// ", $v); $r = $this->Map($k); if (isset($r)) { foreach ($b as $c) { $d = explode(" // ", $c); if ($r == 'symbol') { $d[0] = str_replace(" ", "-", $d[0]); } $s = $this->getRegistry()->getPreferredPrefix($r); if ($s == "ec") { $e = explode(":", $d[0]); $d[0] = $e[1]; } $this->addRDF(parent::triplify($qname, $this->getVoc() . "x-{$s}", "{$s}:" . $d[0]) . parent::describeProperty($this->getVoc() . "x-{$s}", "a relation to {$s}")); } } else { // we handle manually unset($rel); $label = $header[$k]; switch ($label) { case 'GeneChip Array': $array_id = parent::getRes() . str_replace(" ", "-", $v); parent::addRDF(parent::triplify($qname, $this->getVoc() . "genechip-array", $array_id) . parent::describeIndividual($array_id, "Affymetrix {$v} GeneChip array", $this->getVoc() . "Genechip-Array") . parent::describeClass($this->getVoc() . "Genechip-Array", "Affymetrix GeneChip array")); break; case 'Gene Ontology Biological Process': if (!isset($rel)) { $rel = 'go-process'; $prefix = "go"; } case 'Gene Ontology Cellular Component': if (!isset($rel)) { $rel = 'go-location'; $prefix = "go"; } case 'Gene Ontology Molecular Function': if (!isset($rel)) { $rel = 'go-function'; $prefix = "go"; } $b = explode(" /// ", $v); foreach ($b as $c) { $d = explode(" // ", $c); parent::addRDF($this->triplify($qname, $this->getVoc() . $rel, "{$prefix}:" . $d[0]) . $this->describeProperty($this->getVoc() . $rel, "{$rel}")); } break; case 'Transcript Assignments': $b = explode(" /// ", $v); foreach ($b as $c) { $d = explode(" // ", $c); $id = $d[0]; $prefix = $d[2]; if ($prefix == '---' || $id == '---') { continue; } else { if ($prefix == 'gb' || $prefix == 'gb_htc') { $prefix = 'genbank'; } else { if ($prefix == 'ncbibacterial') { $prefix = 'gi'; } else { if ($prefix == 'ncbi_bacterial') { $prefix = 'gi'; } else { if ($prefix == 'ens') { $prefix = 'ensembl'; } else { if ($prefix == 'ncbi_mito' || $prefix == 'ncbi_organelle' || $prefix == 'organelle') { $prefix = 'refseq'; } else { if ($prefix == 'affx' || $prefix == 'unknown' || $prefix == "prop") { $prefix = 'affymetrix'; } else { if ($prefix == 'tigr_2004_08') { $prefix = 'tigr'; } else { if ($prefix == 'tigr-plantta') { $prefix = 'genbank'; } else { if ($prefix == 'newrs.gi') { $prefix = 'gi'; } else { if ($prefix == 'newRS.gi') { $prefix = 'gi'; } else { if ($prefix == 'primate_viral') { $prefix = 'genbank'; } else { if ($prefix == 'jgi-bacterial') { $prefix = 'ncbigene'; } else { if ($prefix == 'tb') { $prefix = 'tuberculist'; } else { if ($prefix == 'pa') { $prefix = 'pseudomonas'; } else { if ($prefix == 'gi|53267') { $prefix = 'gi'; $id = '53267'; } else { if ($prefix == 'broad-tcup') { $e = explode("-", $id); $id = $e[0]; } else { if ($prefix == 'organelle') { $e = explode("-", $id); $prefix = 'genbank'; $id = $e[0]; } } } } } } } } } } } } } } } } } } parent::addRDF(parent::triplify($qname, $this->getVoc() . "transcript-assignment", "{$prefix}:{$id}") . parent::describeProperty($this->getVoc() . "transcript-assignment", "transcript assignment")); } break; case 'Annotation Transcript Cluster': /* $id = substr($v,0,strpos($v,"(")); $rel = str_replace(" ","-",strtolower($label)); $this->AddRDF($this->triplify($qname,parent::getVoc()."$rel", "refseq:$id")); */ break; case 'Annotation Date': // Jun 9, 2011 $rel = "annotation-date"; preg_match("/^([A-Za-z]+) ([0-9]+), ([0-9]{4})\$/", $v, $m); if (count($m) == 4) { array_shift($m); list($m, $day, $year) = $m; $month = $this->getMonth($m); if (!$day || $day == "0") { $day = "01"; } $date = $year . "-" . $month . "-" . str_pad($day, 2, "0", STR_PAD_LEFT) . "T00:00:00Z"; parent::addRDF(parent::triplifyString($qname, $this->getVoc() . $rel, $date, "xsd:dateTime") . parent::describeProperty($this->getVoc() . $rel, "{$rel}")); } else { trigger_error("could not match date from {$v}", E_USER_ERROR); } break; case 'Species Scientific Name': break; case 'Transcript ID(Array Design)': if (!isset($rel)) { $rel = 'transcript'; } case 'Sequence type': default: if (!isset($rel)) { $rel = str_replace(" ", "-", strtolower($label)); } $b = explode(" /// ", $v); foreach ($b as $c) { parent::addRDF(parent::triplifyString($qname, $this->getVoc() . $rel, stripslashes($c)) . parent::describeProperty($this->getVoc() . $rel, "{$rel}")); } break; } // switch } // else } $this->WriteRDFBufferToWriteFile(); } }
function process() { $header = $this->GetReadFile()->Read(200000); $header_arr = explode("\t", $header); $n = 41; $c = count($header_arr); if ($c != $n) { echo PHP_EOL; print_r($header_arr); trigger_error("Expected {$n} columns, found {$c} . please update the script", E_USER_ERROR); exit; } while ($l = $this->GetReadFile()->Read(4096)) { $fields = explode("\t", $l); $id = strtolower($fields[0]); $approved_symbol = $fields[1]; $approved_name = $fields[2]; $status = $fields[3]; $locus_type = $fields[4]; $locus_group = $fields[5]; $previous_symbols = $fields[6]; $previous_names = $fields[7]; $synonyms = $fields[8]; $name_synonyms = $fields[9]; $chromosome = $fields[10]; $date_approved = $fields[11]; $date_modified = $fields[12]; $date_symbol_changed = $fields[13]; $date_name_changed = $fields[14]; $accession_numbers = $fields[15]; $enzyme_ids = $fields[16]; $entrez_gene_id = $fields[17]; $ensembl_gene_id = $fields[18]; $mouse_genome_database_id = $fields[19]; $specialist_database_links = $fields[20]; $specialist_database_ids = $fields[21]; $pubmed_ids = $fields[22]; $refseq_ids = $fields[23]; $gene_family_tag = $fields[24]; $gene_family_description = $fields[25]; $record_type = $fields[26]; $primary_ids = $fields[27]; $secondary_ids = $fields[28]; $ccd_ids = $fields[29]; $vega_ids = $fields[30]; $locus_specific_databases = $fields[31]; $entrez_gene_id_mappeddatasuppliedbyNCBI = $fields[32]; $omim_id_mappeddatasuppliedbyNCBI = $fields[33]; $refseq_mappeddatasuppliedbyNCBI = $fields[34]; $uniprot_id_mappeddatasuppliedbyUniProt = $fields[35]; $ensembl_id_mappeddatasuppliedbyEnsembl = $fields[36]; $vega_id_mappeddatasuppliedbyVega = $fields[37]; $ucsc_id_mappeddatasuppliedbyUCSC = $fields[38]; $mouse_genome_database_id_mappeddatasuppliedbyMGI = $fields[39]; $rat_genome_database_id_mappeddatasuppliedbyRGD = $fields[40]; $id_res = $id; $id_label = "Gene Symbol for " . $approved_symbol; parent::AddRDF(parent::triplify($id_res, "rdf:type", $this->getVoc() . "Gene-Symbol") . parent::describeIndividual($id_res, $id_label, $this->getVoc() . "Gene-Symbol") . parent::describeClass($this->getVoc() . "Gene-Symbol", "HGNC Official Gene Symbol")); if (!empty($approved_symbol)) { $s = "hgnc.symbol:" . $approved_symbol; parent::AddRDF(parent::triplifyString($id_res, $this->getVoc() . "approved-symbol", utf8_encode(htmlspecialchars($approved_symbol))) . parent::describeProperty($this->getVoc() . "approved-symbol", "HGNC approved gene symbol", "The official gene symbol that has been approved by the HGNC and is publicly available. Symbols are approved based on specific HGNC nomenclature guidelines. In the HTML results page this ID links to the HGNC Symbol Report for that gene") . parent::describeIndividual($s, $approved_symbol, parent::getVoc() . "Approved-Gene-Symbol") . parent::describeClass(parent::getVoc() . "Approved-Gene-Symbol", "Approved Gene Symbol") . parent::triplify($id_res, parent::getVoc() . "has-approved-symbol", $s) . parent::triplify($s, parent::getVoc() . "is-approved-symbol-of", $id_res)); } if (!empty($approved_name)) { parent::AddRDF(parent::triplifyString($id_res, $this->getVoc() . "approved-name", utf8_encode(htmlspecialchars($approved_name))) . parent::describeProperty($this->getVoc() . "approved-name", "HGNC approved name", "The official gene name that has been approved by the HGNC and is publicly available. Names are approved based on specific HGNC nomenclature guidelines.")); } if (!empty($status)) { $s = $this->getVoc() . str_replace(" ", "-", $status); parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "status", $s) . parent::describeProperty($this->getVoc() . "status", "HGNC status", "Indicates whether the gene is classified as: Approved - these genes have HGNC-approved gene symbols. Entry withdrawn - these previously approved genes are no longer thought to exist. Symbol withdrawn - a previously approved record that has since been merged into a another record.") . parent::describeClass($s, $status, $this->getVoc() . "Status")); } if (!empty($locus_id)) { $locus_res = $this->getRes() . $id . "_LOCUS"; parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "locus", $locus_res) . parent::triplifyString($locus_res, $this->getVoc() . "locus-type", utf8_encode(htmlspecialchars($locus_type))) . parent::triplifyString($locus_res, $this->getVoc() . "locus-group", utf8_encode(htmlspecialchars($locus_group))) . parent::describeProperty($this->getVoc() . "locus-type", "locus type", "Specifies the type of locus described by the given entry") . parent::describeProperty($this->getVoc() . "locus-group", "locus group", "Groups locus types together into related sets. Below is a list of groups and the locus types within the group")); } if (!empty($previous_symbols)) { $previous_symbols = explode(", ", $previous_symbols); foreach ($previous_symbols as $previous_symbol) { $previous_symbol_uri = "hgnc.symbol:" . $previous_symbol; parent::AddRDF(parent::describeIndividual($previous_symbol_uri, $previous_symbol, parent::getVoc() . "Previous-Symbol") . parent::describeClass(parent::getVoc() . "Previous-Symbol", "Previous Symbol") . parent::triplify($id_res, $this->getVoc() . "previous-symbol", $previous_symbol_uri) . parent::describeProperty($this->getVoc() . "previous-symbol", "HGNC previous symbol", "Symbols previously approved by the HGNC for this gene")); } } if (!empty($previous_names)) { $previous_names = explode(", ", $previous_names); foreach ($previous_names as $previous_name) { $previous_name = str_replace("\"", "", $previous_name); parent::AddRDF(parent::triplifyString($id_res, $this->getVoc() . "previous-name", utf8_encode(htmlspecialchars($previous_name))) . parent::describeProperty($this->getVoc() . "previous-name", "HGNC previous name", "Gene names previously approved by the HGNC for this gene")); } } if (!empty($synonyms)) { $synonyms = explode(", ", $synonyms); foreach ($synonyms as $synonym) { parent::AddRDF(parent::triplifyString($id_res, $this->getVoc() . "synonym", utf8_encode(htmlspecialchars($synonym))) . parent::describeProperty($this->getVoc() . "synonym", "synonym", "Other symbols used to refer to this gene")); } } if (!empty($name_synonyms)) { $name_synonyms = explode(", ", $name_synonyms); foreach ($name_synonyms as $name_synonym) { $name_synonym = str_replace("\"", "", $name_synonym); parent::AddRDF(parent::triplifyString($id_res, $this->getVoc() . "name-synonym", utf8_encode(htmlspecialchars($name_synonym))) . parent::describeProperty($this->getVoc() . "name-synonym", "name synonym", "Other names used to refer to this gene")); } } if (!empty($chromosome)) { parent::AddRDF(parent::triplifyString($id_res, $this->getVoc() . "chromosome", utf8_encode(htmlspecialchars($chromosome))) . parent::describeProperty($this->getVoc() . "chromosome", "chromosome", "Indicates the location of the gene or region on the chromosome")); } if (!empty($date_approved)) { parent::AddRDF(parent::triplifyString($id_res, $this->getVoc() . "date-approved", $date_approved, "xsd:date") . parent::describeProperty($this->getVoc() . "date-approved", "date approved", "Date the gene symbol and name were approved by the HGNC")); } if (!empty($date_modified)) { parent::AddRDF(parent::triplifyString($id_res, $this->getVoc() . "date-modified", $date_modified, "xsd:date") . parent::describeProperty($this->getVoc() . "date-modified", "date modified", "the date the entry was modified by the HGNC")); } if (!empty($date_symbol_changed)) { parent::AddRDF(parent::triplifyString($id_res, $this->getVoc() . "date-symbol-changed", $date_symbol_changed, "xsd:date") . parent::describeProperty($this->getVoc() . "date-symbol-changed", "date symbol changed", "The date the gene symbol was last changed by the HGNC from a previously approved symbol. Many genes receive approved symbols and names which are viewed as temporary (eg C2orf#) or are non-ideal when considered in the light of subsequent information. In the case of individual genes a change to the name (and subsequently the symbol) is only made if the original name is seriously misleading")); } if (!empty($date_name_changed)) { parent::AddRDF(parent::triplifyString($id_res, $this->getVoc() . "date-name-changed", $date_name_changed, "xsd:date") . parent::describeProperty($this->getVoc() . "date-name-changed", "date name changed", "The date the gene name was last changed by the HGNC from a previously approved name")); } if (!empty($accession_numbers)) { $accession_numbers = explode(", ", $accession_numbers); foreach ($accession_numbers as $accession_number) { parent::AddRDF(parent::triplifyString($id_res, $this->getVoc() . "accession", utf8_encode(htmlspecialchars($accession_number))) . parent::describeProperty($this->getVoc() . "accession", "accession number", "Accession numbers for each entry selected by the HGNC")); } } if (!empty($enzyme_ids)) { $enzyme_ids = explode(", ", $enzyme_ids); foreach ($enzyme_ids as $enzyme_id) { parent::AddRDF(parent::triplifyString($id_res, $this->getVoc() . "x-ec", utf8_encode(htmlspecialchars($enzyme_id))) . parent::describeProperty($this->getVoc() . "x-ec", "Enzyme Commission (EC) number", "Enzyme entries have Enzyme Commission (EC) numbers associated with them that indicate the hierarchical functional classes to which they belong")); } } if (!empty($entrez_gene_id)) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-ncbigene", "ncbigene:{$entrez_gene_id}") . parent::describeProperty($this->getVoc() . "x-ncbigene", "NCBI Gene", "NCBI Gene provides curated sequence and descriptive information about genetic loci including official nomenclature, synonyms, sequence accessions, phenotypes, EC numbers, MIM numbers, UniGene clusters, homology, map locations, and related web sites")); } if (!empty($ensembl_gene_id)) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-ensembl", "ensembl:{$ensembl_gene_id}") . parent::describeProperty($this->getVoc() . "x-ensembl", "Ensembl Gene")); } if (!empty($mouse_genome_database_id)) { if (strpos($mouse_genome_database_id, "MGI:") !== FALSE) { $mouse_genome_database_id = substr($mouse_genome_database_id, 4); parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-mgi", "mgi:{$mouse_genome_database_id}") . parent::describeProperty($this->getVoc() . "x-mgi", "MGI entry")); } } if (!empty($specialist_database_links)) { $specialist_database_links = explode(", ", $specialist_database_links); foreach ($specialist_database_links as $specialist_database_link) { preg_match('/href="(\\S+)"/', $specialist_database_link, $matches); if (!empty($matches[1])) { parent::AddRDF(parent::QQuadO_URL($id_res, $this->getVoc() . "xref", $matches[1]) . parent::describeProperty($this->getVoc() . "xref", "Specialist database references.")); } } } if (!empty($pubmed_ids)) { $pubmed_ids = explode(", ", $pubmed_ids); foreach ($pubmed_ids as $pubmed_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-pubmed", "pubmed:" . trim($pubmed_id)) . parent::describeProperty($this->getVoc() . "x-pubmed", "NCBI PubMed entry", "Identifier that links to published articles relevant to the entry in the NCBI's PubMed database.")); } } if (!empty($refseq_ids)) { $refseq_ids = explode(", ", $refseq_ids); foreach ($refseq_ids as $refseq_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-refseq", "refseq:" . trim($refseq_id)) . parent::describeProperty($this->getVoc() . "x-refseq", "NCBI Refseq entry", "The Reference Sequence (RefSeq) identifier for that entry, provided by the NCBI. As we do not aim to curate all variants of a gene only one selected RefSeq is displayed per gene report. RefSeq aims to provide a comprehensive, integrated, non-redundant set of sequences, including genomic DNA, transcript (RNA), and protein products. RefSeq identifiers are designed to provide a stable reference for gene identification and characterization, mutation analysis, expression studies, polymorphism discovery, and comparative analyses. In the HTML results page this ID links to the RefSeq page for that entry.")); } } if (!empty($gene_family_tag)) { parent::AddRDF(parent::triplifyString($id_res, $this->getVoc() . "gene-family-tag", utf8_encode(htmlspecialchars($gene_family_tag))) . parent::describeProperty($this->getVoc() . "gene-family-tag", "Gene Family Tag", "Tag used to designate a gene family or group the gene has been assigned to, according to either sequence similarity or information from publications, specialist advisors for that family or other databases. Families/groups may be either structural or functional, therefore a gene may belong to more than one family/group. These tags are used to generate gene family or grouping specific pages at genenames.org and do not necessarily reflect an official nomenclature. Each gene family has an associated gene family tag and gene family description. If a particular gene is a member of more than one gene family, the tags and the descriptions will be shown in the same order.")); } if (!empty($gene_family_description)) { $gene_family_description = str_replace("\"", "", $gene_family_description); parent::AddRDF(parent::triplifyString($id_res, $this->getVoc() . "gene-family-description", utf8_encode(htmlspecialchars($gene_family_description))) . parent::describeProperty($this->getVoc() . "gene-family-description", "gene family name", "Name given to a particular gene family. The gene family description has an associated gene family tag. Gene families are used to group genes according to either sequence similarity or information from publications, specialist advisors for that family or other databases. Families/groups may be either structural or functional, therefore a gene may belong to more than one family/group.")); } if (!empty($record_type)) { parent::AddRDF(parent::triplifyString($id_res, $this->getVoc() . "record-type", utf8_encode(htmlspecialchars($record_type)))); } if (!empty($primary_ids)) { $primary_ids = explode(", ", $primary_ids); foreach ($primary_ids as $primary_id) { parent::AddRDF(parent::triplifyString($id_res, $this->getVoc() . "primary-id", utf8_encode(htmlspecialchars($primary_id))) . parent::describeProperty($this->getVoc() . "primary-id", "primary identifier")); } } if (!empty($secondary_ids)) { $secondary_ids = explode(", ", $secondary_ids); foreach ($secondary_ids as $secondary_id) { parent::AddRDF(parent::triplifyString($id_res, $this->getVoc() . "secondary-id", utf8_encode(htmlspecialchars($secondary_id))) . parent::describeProperty($this->getVoc() . "secondary-id", "secondary identifier")); } } if (!empty($ccd_ids)) { $ccd_ids = explode(", ", $ccd_ids); foreach ($ccd_ids as $ccd_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-ccds", "ccds:" . trim($ccd_id)) . parent::describeProperty($this->getVoc() . "x-ccds", "consensus CDS entry", "The Consensus CDS (CCDS) project is a collaborative effort to identify a core set of human and mouse protein coding regions that are consistently annotated and of high quality. The long term goal is to support convergence towards a standard set of gene annotations.")); } } if (!empty($vega_ids)) { $vega_ids = explode(", ", $vega_ids); foreach ($vega_ids as $vega_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-vega", "vega:" . trim($vega_id)) . parent::describeProperty($this->getVoc() . "x-vega", "VEGA gene entry")); } } if (!empty($locus_specific_databases)) { parent::AddRDF(parent::triplifyString($id_res, $this->getVoc() . "locus-specific-xref", utf8_encode(htmlspecialchars($locus_specific_databases))) . parent::describeProperty($this->getVoc() . "locus-specific-xref", "locus specific xref", "This contains a list of links to databases or database entries pertinent to the gene")); } if (!empty($entrez_gene_id_mappeddatasuppliedbyNCBI)) { $entrez_gene_id_mappeddatasuppliedbyNCBI = explode(", ", $entrez_gene_id_mappeddatasuppliedbyNCBI); foreach ($entrez_gene_id_mappeddatasuppliedbyNCBI as $gene_id) { if (strstr($gene_id, ":") !== FALSE) { $a = explode(":", $gene_id); $gene_id = $a[1]; } parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-ncbigene", "ncbigene:" . trim($gene_id)) . parent::describeProperty($this->getVoc() . "x-ncbigene", "NCBI Gene entry")); } } if (!empty($omim_id_mappeddatasuppliedbyNCBI)) { $omim_id_mappeddatasuppliedbyNCBI = explode(", ", $omim_id_mappeddatasuppliedbyNCBI); foreach ($omim_id_mappeddatasuppliedbyNCBI as $omim_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-omim", "omim:" . trim($omim_id)) . parent::describeProperty($this->getVoc() . "x-omim", "OMIM entry", "Identifier provided by Online Mendelian Inheritance in Man (OMIM) at the NCBI. This database is described as a catalog of human genes and genetic disorders containing textual information and links to MEDLINE and sequence records in the Entrez system, and links to additional related resources at NCBI and elsewhere. In the HTML results page this ID links to the OMIM page for that entry.")); } } if (!empty($refseq_mappeddatasuppliedbyNCBI)) { $refseq_mappeddatasuppliedbyNCBI = explode(", ", $refseq_mappeddatasuppliedbyNCBI); foreach ($refseq_mappeddatasuppliedbyNCBI as $refseq_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-refseq", "refseq:" . trim($refseq_id)) . parent::describeProperty($this->getVoc() . "x-refseq", "NCBI Refseq entry", "The Reference Sequence (RefSeq) identifier for that entry, provided by the NCBI. As we do not aim to curate all variants of a gene only one selected RefSeq is displayed per gene report. RefSeq aims to provide a comprehensive, integrated, non-redundant set of sequences, including genomic DNA, transcript (RNA), and protein products. RefSeq identifiers are designed to provide a stable reference for gene identification and characterization, mutation analysis, expression studies, polymorphism discovery, and comparative analyses. In the HTML results page this ID links to the RefSeq page for that entry.")); } } if (!empty($uniprot_id_mappeddatasuppliedbyUniProt)) { $uniprot_id_mappeddatasuppliedbyUniProt = explode(", ", $uniprot_id_mappeddatasuppliedbyUniProt); foreach ($uniprot_id_mappeddatasuppliedbyUniProt as $uniprot_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-uniprot", "uniprot:" . trim($uniprot_id)) . parent::describeProperty($this->getVoc() . "x-uniprot", "Uniprot entry", "The UniProt identifier, provided by the EBI. The UniProt Protein Knowledgebase is described as a curated protein sequence database that provides a high level of annotation, a minimal level of redundancy and high level of integration with other databases. In the HTML results page this ID links to the UniProt page for that entry.")); } } if (!empty($ensembl_id_mappeddatasuppliedbyEnsembl)) { $ensembl_id_mappeddatasuppliedbyEnsembl = explode(", ", $ensembl_id_mappeddatasuppliedbyEnsembl); foreach ($ensembl_id_mappeddatasuppliedbyEnsembl as $ensembl_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-ensembl", "ensembl:" . trim($refseq_id)) . parent::describeProperty($this->getVoc() . "x-ensembl", "Ensembl entry", "The Ensembl ID is derived from the current build of the Ensembl database and provided by the Ensembl team.")); } } if (!empty($ucsc_id_mappeddatasuppliedbyVega)) { $ucsc_id_mappeddatasuppliedbyVega = explode(", ", $ucsc_id_mappeddatasuppliedbyVega); foreach ($ucsc_id_mappeddatasuppliedbyVega as $vega_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-vega", "vega:" . trim($vega_id)) . parent::describeProperty($this->getVoc() . "x-vega", "Vega entry")); } } if (!empty($ucsc_id_mappeddatasuppliedbyUCSC)) { $ucsc_id_mappeddatasuppliedbyUCSC = explode(", ", $ucsc_id_mappeddatasuppliedbyUCSC); foreach ($ucsc_id_mappeddatasuppliedbyUCSC as $ucsc_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-ucsc", "ucsc:" . trim($ucsc_id)) . parent::describeProperty($this->getVoc() . "x-ucsc", "UCSC entry")); } } if (!empty($mouse_genome_database_id_mappeddatasuppliedbyMGI)) { $mouse_genome_database_id_mappeddatasuppliedbyMGI = explode(", ", $mouse_genome_database_id_mappeddatasuppliedbyMGI); foreach ($mouse_genome_database_id_mappeddatasuppliedbyMGI as $mgi_id) { if (strpos($mgi_id, "MGI:") !== FALSE) { $mgi_id = substr($mgi_id, 4); } parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-mgi", "mgi:" . trim($mgi_id)) . parent::describeProperty($this->getVoc() . "x-mgi", "MGI entry")); } } if (!empty($rat_genome_database_id_mappeddatasuppliedbyRGD)) { $rat_genome_database_id_mappeddatasuppliedbyRGD = explode(", ", trim($rat_genome_database_id_mappeddatasuppliedbyRGD)); foreach ($rat_genome_database_id_mappeddatasuppliedbyRGD as $rgd_id) { $rgd_id = trim($rgd_id); if (!empty($rgd_id)) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-rgd", trim($rgd_id)) . parent::describeProperty($this->getVoc() . "x-rgd", "RGD entry")); } } } //write RDF to file $this->WriteRDFBufferToWriteFile(); } //while }
private function citations() { while ($l = $this->getReadFile()->read(2000000)) { $a = explode("\t|\t", rtrim($l, "\t|\n")); if (!isset($a[1]) or !isset($a[2])) { continue; } $c = parent::getRes() . "citation-id-" . $a[0]; $seealso = isset($a[4]) ? trim($a[4]) : ""; if ($seealso) { $seealso = str_replace(array("lx: DOI ", "http;//"), array("http://dx.doi.org/", "http://"), $seealso); if (strlen($seealso) > 2 and !strstr($seealso, "http")) { $seealso = "http://" . $seealso; } $seealso = parent::triplify($c, "rdfs:seeAlso", $seealso); } parent::addRDF(parent::describeIndividual($c, $a[1], $this->getVoc() . "Citation") . parent::describeClass($this->getVoc() . "Citation", "Citation") . parent::triplifyString($c, parent::getVoc() . "citation-key", $a[1]) . ($a[2] == "0" ? "" : parent::triplify($c, parent::getVoc() . "x-pubmed", "pubmed:" . $a[2])) . $seealso . ((isset($a[5]) and $a[5]) ? parent::triplifyString($c, parent::getVoc() . "text", str_replace("\"", "", $a[5])) : "")); if (isset($a[6])) { $taxids = explode(" ", trim($a[6])); if (count($taxids)) { foreach ($taxids as $taxid) { parent::addRDF(parent::triplify("taxonomy:{$taxid}", $this->getVoc() . "citation", $c)); } } } $this->writeRDFBufferToWriteFile(); } //while }
function pubmed() { $citations = null; $ext = substr(strrchr($this->getReadFile()->getFileName(), '.'), 1); if ($ext = "gz") { $citations = new SimpleXMLElement("compress.zlib://" . $this->getReadFile()->getFileName(), NULL, TRUE); } elseif ($ext = "xml") { $citations = new SimpleXMLElement($this->getReadFile()->getFileName(), NULL, TRUE); } foreach ($citations->MedlineCitation as $citation) { $this->setCheckPoint('record'); $pmid = "" . $citation->PMID; if (isset($this->id_list)) { if (!isset($this->id_list[$pmid])) { continue; } else { echo "processing {$pmid}" . PHP_EOL; } } $pmid_uri = parent::getNamespace() . $citation->PMID; $article = $citation->Article; parent::addRDF(parent::describeIndividual($pmid_uri, $this->getString($article->ArticleTitle), parent::getVoc() . "PubMedRecord") . parent::describeClass(parent::getVoc() . "PubMedRecord", "PubMedRecord") . parent::triplify($pmid_uri, "rdfs:seeAlso", "http://www.ncbi.nlm.nih.gov/pubmed/{$pmid}")); // metadata about the record $owner = parent::getRes() . md5($citation['Owner']); parent::addRDF(parent::describeIndividual($owner, $citation['Owner'], "foaf:Agent") . parent::triplify($pmid_uri, parent::getVoc() . "owner", $owner)); $status = parent::getRes() . md5($citation['Status']); parent::addRDF(parent::describeIndividual($status, $citation['Status'], parent::getVoc() . "Status") . parent::describeClass(parent::getVoc() . "Status", "Status") . parent::triplify($pmid_uri, parent::getVoc() . "status", $status) . parent::triplifyString($pmid_uri, parent::getVoc() . "version", $citation['VersionID'])); $this->addDate($pmid_uri, "version-date", $citation['VersionDate']); $this->addDate($pmid_uri, "date-created", $citation->DateCreated); $this->addDate($pmid_uri, "date-revised", $citation->DateRevised); $this->addDate($pmid_uri, "date-completed", $citation->DateCompleted); if (!empty($citation->MeshHeadingList)) { $i = 0; foreach ($citation->MeshHeadingList->MeshHeading as $mh) { $id = parent::getRes() . $pmid . "_mh_" . ++$i; $did = parent::getRes() . md5($mh->DescriptorName); parent::addRDF(parent::describeIndividual($id, $mh->DescriptorName, parent::getVoc() . "MeshHeading") . parent::describeClass(parent::getVoc() . "MeshHeading", "MeSH Heading") . parent::triplify($pmid_uri, parent::getVoc() . "mesh-heading", $id) . parent::triplifyString($id, parent::getVoc() . "descriptor-major-topic", "" . $mh->DescriptorName['MajorTopicYN']) . parent::describeIndividual($did, "" . $mh->DescriptorName, parent::getVoc() . "Mesh-Descriptor") . parent::triplify($id, parent::getVoc() . "mesh-descriptor", $did)); if (!empty($mh->QualifierName)) { foreach ($mh->QualifierName as $qualifier_name) { $qid = parent::getRes() . md5($qualifier_name); parent::addRDF(parent::describeIndividual($qid, $qualifier_name, parent::getVoc() . "Mesh-Qualifier") . parent::triplify($id, parent::getVoc() . "mesh-qualifier", $qid)); } } } } if (!empty($citation->ChemicalList)) { $i = 0; foreach ($citation->ChemicalList->Chemical as $chemical) { $id = parent::getRes() . $pmid . "_ch_" . ++$i; parent::addRDF(parent::describeIndividual($id, $chemical->NameOfSubstance, parent::getVoc() . "Chemical") . parent::describeClass(parent::getVoc() . "Chemical", "Chemical") . parent::triplify($pmid_uri, parent::getVoc() . "chemical", $id)); if ($chemical->RegistryNumber != "0") { // check if "EC" if (substr($chemical->RegistryNumber, 0, 2) == "EC") { $ec = substr($chemical->RegistryNumber, 3); parent::addRDF(parent::triplify($id, parent::getVoc() . "x-ec", "ec:" . $ec)); } else { parent::addRDF(parent::triplify($id, parent::getVoc() . "x-cas", "cas:" . $chemical->RegistryNumber)); } } } } if (!empty($citation->GeneSymbolList)) { foreach ($citation->GeneSymbolList->GeneSymbol as $geneSymbol) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "gene-symbol", $geneSymbol)); } } if (!empty($citation->SupplMeshList)) { foreach ($citation->SupplMeshList->SupplMeshName as $supplMeshName) { $id = parent::getRes() . md5($supplMeshName); parent::addRDF(parent::describeIndividual($id, $supplMeshName, parent::getVoc() . "MeshHeading") . parent::triplify($pmid_uri, parent::getVoc() . "supplemental-mesh-heading", $id)); } } foreach ($article->PublicationTypeList->PublicationType as $publicationType) { $id = parent::getRes() . md5($publicationType); $label = str_replace(" ", "-", $publicationType); parent::addRDF(parent::triplify($pmid_uri, parent::getVoc() . "publication-type", $id) . parent::describeClass($id, $publicationType)); } if (!empty($article->Abstract)) { $id = parent::getRes() . $pmid . "_ABSTRACT"; $label = "Abstract for PMID:{$pmid}"; $abstract = $article->Abstract; parent::addRDF(parent::describeIndividual($id, $label, parent::getVoc() . "Article-Abstract") . parent::describeClass(parent::getVoc() . "Article-Abstract", "Article Abstract") . parent::triplify($pmid_uri, "dc:abstract", $id) . parent::triplifyString($id, parent::getVoc() . "copyright", $abstract->CopyrightInformation)); $section = 0; $abstractText = ""; foreach ($abstract->AbstractText as $text) { $abstractText .= " " . $text; if (!empty($text['Label']) && $text['Label'] !== "UNLABELLED") { $section_id = parent::getRes() . $pmid . "_ABSTRACT_SECTION_" . ++$section; parent::addRDF(parent::triplify($id, parent::getVoc() . "section", $section_id) . parent::triplifyString($section_id, parent::getVoc() . "order", $section) . parent::triplifyString($section_id, parent::getVoc() . "nlm-section-type", $text['NlmCategory']) . parent::triplifyString($section_id, parent::getVoc() . "label", $text['Label']) . parent::triplifyString($section_id, parent::getVoc() . "text", $text)); } } parent::addRDF(parent::triplifyString($id, parent::getVoc() . "abstract-text", $abstractText)); } if (!empty($citation->OtherAbstract)) { $i = 0; foreach ($citation->OtherAbstract as $ab) { $id = parent::getRes() . $pmid . "_oa_" . ++$i; parent::addRDF(parent::describeIndividual($id, "", parent::getVoc() . "Article-Abstract") . parent::describeClass(parent::getVoc() . "Article-Abstract", "Article Abstract") . parent::triplify($pmid_uri, "dc:abstract", $id)); $abstractText = ""; foreach ($ab->AbstractText as $text) { $abstractText .= " " . $text; if (!empty($text['Label']) && $text['Label'] !== "UNLABELLED") { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "abstract_" . strtolower($text['Category']), $text)); } } parent::addRDF(parent::triplifyString($id, parent::getVoc() . "abstract-text", $abstractText)); } } $author_types = array("Investigator", "Author", "PersonalNameSubject"); foreach ($author_types as $author_type) { $listname = $author_type . "List"; if (!empty($article->{$listname}->{$author_type})) { $i = 0; foreach ($article->{$listname}->{$author_type} as $author) { $id = parent::getRes() . $pmid . "_AUTHOR_" . ++$i; $author_label = $author->LastName . ($author->Initials ? ", " . $author->Initials : ""); parent::addRDF(parent::describeIndividual($id, $author_label, parent::getVoc() . $author_type) . parent::describeClass(parent::getVoc() . $author_type, $author_type) . parent::triplifyString($id, parent::getVoc() . "list-position", $i) . parent::triplify($pmid_uri, parent::getVoc() . strtolower($author_type), $id) . parent::triplifyString($id, parent::getVoc() . "last-name", $author->LastName) . parent::triplifyString($id, parent::getVoc() . "fore-name", $author->ForeName) . parent::triplifyString($id, parent::getVoc() . "initials", $author->Initials) . parent::triplifyString($id, parent::getVoc() . "collective-name", $author->CollectiveName) . parent::triplifyString($id, parent::getVoc() . "suffix", $author->Suffix)); if ($author->Affiliation) { $affilitation = parent::getRes() . md5($author->Affilitation); parent::addRDF(parent::describeIndividual($affilitation, $author->Affilitation, parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplifyString($id, parent::getVoc() . "affiliation", $affilitation)); } foreach ($author->NameID as $authorNameId) { if (!empty($authorNameId)) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "name-id", $author_name_id)); } } } } } if (!empty($article->ArticleDate)) { $this->addDate($pmid_uri, "article-date", $article->ArticleDate); } foreach ($article->Language as $language) { parent::addRDF(parent::triplifyString($pmid_uri, "dc:language", $language)); } if (!empty($citation->KeywordList)) { foreach ($citation->KeywordList->Keyword as $keyword) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "keyword", $keyword)); } } if (!empty($citation->otherID)) { // untested foreach ($citation->OtherID as $otherID) { if (!empty($otherID)) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "other-id", $other_id) . parent::triplifyString($pmid_uri, parent::getVoc() . "other-id-source", $otherID['Source'])); if (strstr($other_id, "PMC")) { parent::addRDF(parent::triplify($pmid_uri, parent::getVoc() . "x-pmc", "pmc:" . $other_id)); } } } } if (!empty($article->DataBankList)) { foreach ($article->DataBankList->DataBank as $dataBank) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "databank", $dataBank->DataBankName)); if ($dataBank->AccessionNumberList !== NULL) { foreach ($dataBank->AccessionNumberList->AccessionNumber as $acc) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "x-" . strtolower($dataBank->dataBankName), $acc)); } } } } if (!empty($article->GrantList)) { $i = 0; foreach ($article->GrantList->Grant as $grant) { $id = parent::getRes() . $pmid . "_GRANT_" . ++$i; $grant_label = "Grant " . $grant->GrantID . " for " . parent::getNamespace() . $pmid; parent::addRDF(parent::describeIndividual($id, $grant_label, parent::getVoc() . "Grant") . parent::describeClass(parent::getVoc() . "Grant", "Grant") . parent::triplify($pmid_uri, parent::getVoc() . "grant", $id) . parent::triplifyString($id, parent::getVoc() . "grant-identifier", $grant->GrantID) . parent::triplifyString($id, parent::getVoc() . "grant-acronym", $grant->Acronym) . parent::triplifyString($id, parent::getVoc() . "grant-agency", $grant->Agency) . parent::triplifyString($id, parent::getVoc() . "grant-country", $grant->Country)); } } if (!empty($citation->NumberOfReferences)) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "number-of-references", $citation->NumberOfReferences)); } if (!empty($article->VernacularTitle)) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "vernacular-title", $article->VernacularTitle)); } foreach ($citation->CitationSubset as $citationSubset) { if (!empty($citationSubset)) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "citation-subset", $citationSubset)); } } if (!empty($citation->commentsCorrectionsList)) { $i = 0; foreach ($commentsCorrectionsList->CommentsCorrections as $commentCorrection) { $id = parent::getRes() . $pmid . "_COMMENT_CORRECTION_" . ++$i; $ccRefType = $commentCorrection['RefType']; $ccPmid = $commentCorrection->PMID; //optional $ccNote = $commentCorrection->Note; //optional $cc_label = "Comment or correction ." . $ccNumber . " for " . parent::getNamespace() . $pmid; parent::addRDF(parent::describeIndividual($id, $cc_label, parent::getVoc() . "CommentCorrection") . parent::describeClass(parent::getVoc() . "CommentCorrection", "CommentCorrection") . parent::triplify($pmid_uri, parent::getVoc() . "comment-correction", $id) . parent::triplify($id, "rdf:type", parent::getVoc() . $ccRefType) . parent::triplifyString($id, parent::getVoc() . "ref-source", $ref_source) . parent::triplifyString($id, parent::getVoc() . "note", $cc_note)); } } if (!empty($citation->generalNote)) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "general-note", $general_note)); } foreach ($citation->SpaceFlightMission as $spaceFlightMission) { if (!empty($spaceFlightMission)) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "space-flight-mission" . $space_flight_mission)); } } $journal = $article->Journal; $journalId = parent::getRes() . $pmid . "_JOURNAL"; $journal_label = "Journal for " . parent::getNamespace() . $pmid; parent::addRDF(parent::describeIndividual($journalId, $journal_label, parent::getVoc() . "Journal") . parent::describeClass(parent::getVoc() . "Journal", "Journal") . parent::triplify($pmid_uri, parent::getVoc() . "journal", $journalId) . parent::triplify($journalId, parent::getVoc() . "x-issn", "issn:" . $journal->ISSN) . parent::triplifyString($journalId, parent::getVoc() . "journal-nlm-identifier", $citation->MedLineJournalInfo->NlmUniqueID) . parent::triplifyString($journalId, parent::getVoc() . "journal-title", $journal->Title) . parent::triplifyString($journalId, parent::getVoc() . "journal-abbreviation", $journal->ISOAbbreviation) . parent::triplifyString($journalId, parent::getVoc() . "volume", $journal->JournalIssue->Volume) . parent::triplifyString($journalId, parent::getVoc() . "issue", $journal->JournalIssue->Issue) . parent::triplifyString($journalId, parent::getVoc() . "pages", "" . $article->Pagination->MedlinePgn)); $journalPubDate = $journal->JournalIssue->PubDate; if (!empty($journalPubDate)) { $journalYear = $journalPubDate->Year; $journalMonth = trim($journalPubDate->Month); //optional if ($journalMonth and !is_numeric($journalMonth[0])) { $mo = array("jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"); $journalMonth = str_pad(array_search(strtolower($journalMonth), $mo) + 1, 2, "0", STR_PAD_LEFT); } $journalDay = trim($journalPubDate->Day); //optional if ($journalDay) { $journalDay = str_pad($journalDay, 2, "0", STR_PAD_LEFT); } parent::addRDF(parent::triplifyString($journalId, parent::getVoc() . "publication-year", $journalYear) . parent::triplifyString($journalId, parent::getVoc() . "publication-month", $journalMonth) . parent::triplifyString($journalId, parent::getVoc() . "publication-day", $journalDay) . parent::triplifyString($journalId, parent::getVoc() . "publication-season", $journalPubDate->Season) . parent::triplifyString($journalId, parent::getVoc() . "publication-date", $journalPubDate->MedlineDate)); if (!empty($journalYear) and !empty($journalMonth) and !empty($journalDay)) { parent::addRDF(parent::triplifyString($journalId, parent::getVoc() . "publication-date", "{$journalYear}-{$journalMonth}-{$journalDay}", "xsd:date")); } } foreach ($citation->Article->ELocation as $eLocation) { if (!empty($eLocation)) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "elocation", $eLocation)); } } $this->writeRDFBufferToWriteFile(); //break; } }
function parseDrugEntry(&$xml) { $declared = null; // a list of all the entities declared $counter = 1; $x = $xml->GetXMLRoot(); $dbid = (string) $x->{"drugbank-id"}; $did = "drugbank:" . $dbid; $name = (string) $x->name; $type = ucfirst((string) str_replace(" ", "-", $x->attributes()->type)); $type_label = ucfirst($x->attributes()->type); $description = null; if (isset($this->id_list)) { if (!isset($this->id_list[$dbid])) { return; } unset($this->id_list[$dbid]); } echo "Processing {$dbid}" . PHP_EOL; if (isset($x->description) && $x->description != '') { $description = trim((string) $x->description); } parent::addRDF(parent::describeIndividual($did, $name, parent::getVoc() . "Drug", $name, $description) . parent::describeClass(parent::getVoc() . "Drug", "Drug") . parent::triplify($did, "owl:sameAs", "http://identifiers.org/drugbank/" . $dbid) . parent::triplify($did, "rdfs:seeAlso", "http://www.drugbank.ca/drugs/" . $dbid) . parent::triplify($did, "rdf:type", parent::getVoc() . $type) . parent::describeClass(parent::getVoc() . $type, $type_label)); foreach ($x->{'drugbank-id'} as $id) { parent::addRDF(parent::triplifyString($did, parent::getVoc() . "drugbank-id", $id)); } if (isset($x->{'cas-number'})) { parent::addRDF(parent::triplify($did, parent::getVoc() . "x-cas", "cas:" . $x->{'cas-number'})); } $literals = array("indication", "pharmacodynamics", "mechanism-of-action", "toxicity", "biotransformation", "absorption", "half-life", "protein-binding", "route-of-elimination", "volume-of-distribution", "clearance"); foreach ($literals as $l) { if (isset($x->{$l}) and $x->{$l} != '') { $lid = parent::getRes() . md5($l . $x->{$l}); parent::addRDF(parent::describeIndividual($lid, "{$l} for {$did}", parent::getVoc() . ucfirst($l), "{$l} for {$did}", $x->{$l}) . parent::describeClass(parent::getVoc() . ucfirst($l), ucfirst(str_replace("-", " ", $l))) . parent::triplify($did, parent::getVoc() . $l, $lid)); } } // TODO:: Replace the next two lines $this->AddList($x, $did, "groups", "group", parent::getVoc() . "group"); $this->AddList($x, $did, "categories", "category", parent::getVoc() . "category"); if (isset($x->classification)) { foreach ($x->classification->children() as $k => $v) { $cid = parent::getRes() . md5($v); parent::addRDF(parent::describeIndividual($cid, $v, parent::getVoc() . "Drug-Classification-Category") . parent::describeClass(parent::getVoc() . "Drug-Classification-Category", "Drug Classification Category") . parent::triplify($did, parent::getVoc() . "drug-classification-category", $cid)); } } $this->addLinkedResource($x, $did, 'atc-codes', 'atc-code', 'atc'); $this->addLinkedResource($x, $did, 'ahfs-codes', 'ahfs-code', 'ahfs'); // taxonomy $this->AddText($x, $did, "taxonomy", "kingdom", parent::getVoc() . "kingdom"); // substructures $this->AddText($x, $did, "taxonomy", "substructures", parent::getVoc() . "substructure", "substructure"); // synonyms $this->AddCategory($x, $did, "synonyms", "synonym", parent::getVoc() . "synonym"); // brand names $this->AddCategory($x, $did, "international-brands", "international-brand", parent::getVoc() . "brand"); // salt if (isset($x->salts->salt)) { foreach ($x->salts->salt as $s) { $sid = parent::getPrefix() . ':' . $s->{'drugbank-id'}; parent::addRDF(parent::describeIndividual($sid, $s->name, parent::getVoc() . "Salt") . parent::describeClass(parent::getVoc() . "Salt", "Salt") . parent::triplify($did, parent::getVoc() . "salt", $sid) . parent::triplify($sid, parent::getVoc() . "x-cas", "cas:" . $s->{'cas-number'}) . parent::triplify($sid, parent::getVoc() . "x-inchikey", "inchikey:" . $s->{'inchikey'})); } } // mixtures // <mixtures><mixture><name>Cauterex</name><ingredients>dornase alfa + fibrinolysin + gentamicin sulfate</ingredients></mixture> if (isset($x->mixtures)) { $id = 0; foreach ($x->mixtures->mixture as $item) { if (isset($item)) { $o = $item; $mid = parent::getRes() . str_replace(" ", "-", $o->name[0]); parent::addRDF(parent::triplify($did, parent::getVoc() . "mixture", $mid) . parent::describeIndividual($mid, $o->name[0], parent::getVoc() . "Mixture") . parent::describeClass(parent::getVoc() . "Mixture", "mixture") . parent::triplifyString($mid, $this->getVoc() . "ingredients", "" . $o->ingredients[0])); $a = explode(" + ", $o->ingredients[0]); foreach ($a as $b) { $b = trim($b); $iid = parent::getRes() . str_replace(" ", "-", $b); parent::addRDF(parent::describeClass($iid, $b, parent::getVoc() . "Ingredient") . parent::describeClass(parent::getVoc() . "Ingredient", "Ingredient") . parent::triplify($mid, parent::getVoc() . "ingredient", $iid)); } } } } // packagers // <packagers><packager><name>Cardinal Health</name><url>http://www.cardinal.com</url></packager> if (isset($x->packagers)) { foreach ($x->packagers as $items) { if (isset($items->packager)) { foreach ($items->packager as $item) { $pid = parent::getRes() . md5($item->name); parent::addRDF(parent::triplify($did, parent::getVoc() . "packager", $pid)); if (!isset($defined[$pid])) { $defined[$pid] = ''; parent::addRDF(parent::describe($pid, "" . $item->name[0])); if (strstr($item->url, "http://") && $item->url != "http://BASF Corp.") { parent::addRDF($this->triplify($pid, "rdfs:seeAlso", "" . $item->url[0])); } } } } } } // manufacturers $this->AddText($x, $did, "manufacturers", "manufacturer", parent::getVoc() . "manufacturer"); // @TODO RESOURCE // prices if (isset($x->prices->price)) { foreach ($x->prices->price as $product) { $pid = parent::getRes() . md5($product->description); parent::addRDF(parent::describeIndividual($pid, $product->description, parent::getVoc() . "Pharmaceutical", $product->description) . parent::describeClass(parent::getVoc() . "Pharmaceutical", "pharmaceutical") . parent::triplifyString($pid, parent::getVoc() . "price", "" . $product->cost, "xsd:float") . parent::triplify($did, parent::getVoc() . "product", $pid)); $uid = parent::getVoc() . md5($product->unit); parent::addRDF(parent::describeIndividual($uid, $product->unit, parent::getVoc() . "Unit", $product->unit) . parent::describeClass(parent::getVoc() . "Unit", "unit") . parent::triplify($pid, parent::getVoc() . "form", $uid)); } } // dosages <dosages><dosage><form>Powder, for solution</form><route>Intravenous</route><strength></strength></dosage> if (isset($x->dosages->dosage)) { foreach ($x->dosages->dosage as $dosage) { $id = parent::getRes() . md5($dosage->strength . $dosage->form . $dosage->route); $label = ($dosage->strength != '' ? $dosage->strength . " " : "") . $dosage->form . " form with " . $dosage->route . " route"; parent::addRDF(parent::describeIndividual($id, $label, parent::getVoc() . "Dosage") . parent::describeClass(parent::getVoc() . "Dosage", "Dosage") . parent::triplify($did, parent::getVoc() . "dosage", $id)); $rid = parent::getVoc() . md5($dosage->route); $this->typify($id, $rid, "Route", "" . $dosage->route); $fid = parent::getVoc() . md5($dosage->form); $this->typify($id, $fid, "Form", "" . $dosage->form); if ($dosage->strength != '') { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "strength", $dosage->strength)); } } } // experimental-properties $props = array("experimental-properties", "calculated-properties"); foreach ($props as $prop) { $subtype = substr($prop, 0, strpos("-", $prop)); if (isset($x->{$prop})) { foreach ($x->{$prop} as $properties) { foreach ($properties as $property) { $type = (string) $property->kind; $value = (string) $property->value; $type_uri = parent::getVoc() . ucfirst(str_replace(" ", "-", $type)); $id = parent::getRes() . $prop . "-" . $dbid . "-" . $counter++; $label = $property->kind . ": {$value}" . ($property->source == '' ? '' : " from " . $property->source); parent::addRDF(parent::describeIndividual($id, $label, $type_uri) . parent::describeClass($type_uri, $type, parent::getVoc() . ucfirst($prop)) . parent::describeClass(parent::getVoc() . ucfirst($prop), str_replace("-", " ", $prop)) . parent::triplifyString($id, $this->getVoc() . "value", $value) . parent::triplify($did, $this->getVoc() . $prop, $id)); // Source if (isset($property->source)) { foreach ($property->source as $source) { $s = (string) $source; if ($s == '') { continue; } $sid = parent::getRes() . md5($s); parent::addRDF(parent::describeIndividual($sid, $s, parent::getVoc() . "Source") . parent::describeClass(parent::getVoc() . "Source", "Source") . parent::triplify($id, parent::getVoc() . "source", $sid)); } } } } } } // identifiers // <patents><patent><number>RE40183</number><country>United States</country><approved>1996-04-09</approved> <expires>2016-04-09</expires> if (isset($x->patents->patent)) { foreach ($x->patents->patent as $patent) { $id = "uspto:" . $patent->number; parent::addRDF(parent::triplify($did, $this->getVoc() . "patent", $id) . parent::describeIndividual($id, $patent->country . " patent " . $patent->number, $this->getVoc() . "Patent") . parent::describeClass(parent::getVoc() . "Patent", "patent") . parent::triplifyString($id, $this->getVoc() . "approved", "" . $patent->approved) . parent::triplifyString($id, $this->getVoc() . "expires", "" . $patent->expires)); $cid = parent::getRes() . md5($patent->country); $this->typify($id, $cid, "Country", "" . $patent->country); } } // partners $partners = array('target', 'enzyme', 'transporter', 'carrier'); foreach ($partners as $partner) { $plural = $partner . 's'; if (isset($x->{$plural})) { foreach ($x->{$plural} as $list) { foreach ($list->{$partner} as $item) { $this->parsePartnerRelation($did, $item, $partner); parent::writeRDFBufferToWriteFile(); } } } } // drug-interactions $y = (int) substr($dbid, 2); if (isset($x->{"drug-interactions"})) { foreach ($x->{"drug-interactions"} as $ddis) { foreach ($ddis->{"drug-interaction"} as $ddi) { $dbid2 = $ddi->{'drugbank-id'}; if ($dbid < $dbid2) { // don't repeat $ddi_id = parent::getRes() . $dbid . "_" . $dbid2; parent::addRDF(parent::triplify("drugbank:" . $dbid, parent::getVoc() . "ddi-interactor-in", "" . $ddi_id) . parent::triplify("drugbank:" . $dbid2, parent::getVoc() . "ddi-interactor-in", "" . $ddi_id) . parent::describeIndividual($ddi_id, "DDI between {$name} and " . $ddi->name . " - " . $ddi->description, parent::getVoc() . "Drug-Drug-Interaction") . parent::describeClass(parent::getVoc() . "Drug-Drug-Interaction", "drug-drug interaction")); } } } } // food-interactions $this->AddText($x, $did, "food-interactions", "food-interaction", parent::getVoc() . "food-interaction"); // affected-organisms $this->AddCategory($x, $did, "affected-organisms", "affected-organism", parent::getVoc() . "affected-organism"); // <external-identifiers> if (isset($x->{"external-identifiers"})) { foreach ($x->{"external-identifiers"} as $objs) { foreach ($objs as $obj) { $ns = $this->NSMap($obj->resource); $id = $obj->identifier; if ($ns == "genecards") { $id = str_replace(array(" "), array("_"), $id); } parent::addRDF(parent::triplify($did, parent::getVoc() . "x-{$ns}", "{$ns}:{$id}")); if ($ns == "pubchemcompound") { parent::addRDF(parent::triplify("{$ns}:{$id}", "skos:exactMatch", "http://rdf.ncbi.nlm.nih.gov/pubchem/compound/{$id}")); } } } } // <external-links> if (isset($x->{"external-links"})) { foreach ($x->{"external-links"}->{'external-link'} as $el) { if (strpos($el->url, 'http') !== false) { parent::addRDF(parent::triplify($did, "rdfs:seeAlso", "" . $el->url)); } } } parent::writeRDFBufferToWriteFile(); }
function psiblast() { while ($l = $this->GetReadFile()->Read(2048)) { $a = explode("\t", trim($l)); $id1 = $a[0]; $id2 = $a[7]; $id = "aln_{$id1_}{$id2}"; $this->AddRDF(parent::describeIndividual($this->getRes() . $id, "psiblast alignment between {$id1} and {$id2}", $this->getVoc() . "PSI-BLAST-Alignment") . parent::describeClass($this->getVoc() . "PSI-BLAST-Alignment", "PSI-Blast Alignment") . parent::triplify($this->getRes() . $id, $this->getVoc() . "query", $this->getNamespace() . $id1) . parent::triplify($this->getRes() . $id, $this->getVoc() . "target", $this->getNamespace() . $id2) . parent::triplifyString($this->getRes() . $id, $this->getVoc() . "query-start", $a[1]) . parent::triplifyString($this->getRes() . $id, $this->getVoc() . "query-stop", $a[2]) . parent::triplifyString($this->getRes() . $id, $this->getVoc() . "target-start", $a[3]) . parent::triplifyString($this->getRes() . $id, $this->getVoc() . "target-stop", $a[4]) . parent::triplifyString($this->getRes() . $id, $this->getVoc() . "percent-aligned", $a[5]) . parent::triplifyString($this->getRes() . $id, $this->getVoc() . "score", $a[6]) . parent::triplifyString($this->getRes() . $id, $this->getVoc() . "is-encoded-by", "taxon:" . $a[8]) . parent::describeProperty($this->getVoc() . "target-start", "Relationship between an SGD sequence alignment and its target sequence start position") . parent::describeProperty($this->getVoc() . "target-stop", "Relationship between an SGD sequence alignment and its target sequence stop position") . parent::describeProperty($this->getVoc() . "score", "Relationship between an SGD sequence alignment and its score") . parent::describeProperty($this->getVoc() . "percent-aligned", "Relationship between an SGD sequence alignment and its percent-aligned value") . parent::describeProperty($this->getVoc() . "is-encoded-by", "Relationship between an SGD sequence alignment and the taxon the aligned sequences are encoded by")); parent::writeRDFBufferToWriteFile(); } //while return TRUE; }
function genes($file) { $xml = new CXML($file); while ($xml->parse("DisorderList") == TRUE) { $x = $xml->GetXMLRoot(); foreach ($x->Disorder as $d) { $orphanet_id = parent::getNamespace() . (string) $d->OrphaNumber; $disorder_name = (string) $d->Name; foreach ($d->DisorderGeneAssociationList->DisorderGeneAssociation as $dga) { // gene $gene = $dga->Gene; $gene_id = parent::getNamespace() . (string) $gene->OrphaNumber; $gene_internal_id = (string) $gene->attributes()->id; $gene_label = (string) $gene->Name; $gene_symbol = (string) $gene->Symbol; parent::addRDF(parent::describeIndividual($gene_id, $gene_label, parent::getVoc() . "Gene") . parent::describeClass(parent::getVoc() . "Gene", "orphanet gene") . parent::triplifyString($gene_id, parent::getVoc() . "symbol", $gene_symbol)); foreach ($gene->SynonymList as $s) { $synonym = (string) $s->Synonym; parent::addRDF(parent::triplifyString($gene_id, parent::getVoc() . "synonym", $synonym)); } foreach ($gene->ExternalReferenceList as $erl) { $er = $erl->ExternalReference; $db = (string) $er->Source; $db = parent::getRegistry()->getPreferredPrefix($db); $id = (string) $er->Reference; $xref = "{$db}:{$id}"; parent::addRDF(parent::triplify($gene_id, parent::getVoc() . "x-{$db}", $xref)); } $dga_id = parent::getRes() . (string) $d->OrphaNumber . "_" . md5($dga->asXML()); $ga = $dga->DisorderGeneAssociationType; $ga_id = parent::getNamespace() . (string) $ga->attributes()->id; $ga_label = (string) $ga->Name; $s = $dga->DisorderGeneAssociationStatus; $s_id = parent::getNamespace() . (string) $s->attributes()->id; $s_label = (string) $s->Name; parent::addRDF(parent::describeIndividual($dga_id, "{$ga_label} {$gene_label} in {$disorder_name} ({$s_label})", $ga_id) . parent::describeClass($ga_id, $ga_label, parent::getVoc() . "Disorder-Gene-Association") . parent::triplify($dga_id, parent::getVoc() . "status", $s_id) . parent::describeClass($s_id, $s_label, parent::getVoc() . "Disorder-Gene-Association-Status") . parent::triplify($dga_id, parent::getVoc() . "disorder", $orphanet_id) . parent::describeIndividual($orphanet_id, $disorder_name, parent::getVoc() . "Disorder") . parent::triplify($dga_id, parent::getVoc() . "gene", $gene_id)); } parent::writeRDFBufferToWriteFile(); } } unset($xml); }
function freq() { $cols = 10; $i = 1; parent::setCheckpoint('file'); while ($l = parent::getReadFile()->read()) { $a = explode("\t", str_replace("%", "", $l)); if (count($a) != $cols) { trigger_error("Expecting {$cols}, but found " . count($a) . " instead... skipping file!", E_USER_ERROR); return false; } list($stitch_flat, $stitch_stereo, $cui, $placebo, $freq, $freq_lower, $freq_upper, $concept_type, $meddra_concept_id, $meddra_concept_label) = $a; if ($concept_type == "LLT") { continue; } $meddra_concept_label = trim($meddra_concept_label); $id = "stitch_resource:" . md5("se_freq" . $l); $stitch_flat = "stitch:{$stitch_flat}"; $label = "{$meddra_concept_label} frequency for {$stitch_flat}"; parent::addRDF(parent::describeIndividual($id, $label, parent::getVoc() . "Drug-Effect-Frequency") . parent::describeClass(parent::getVoc() . "Drug-Effect-Frequency", "SIDER Drug-Effect and Frequency") . parent::triplify($id, parent::getVoc() . "drug", $stitch_flat) . parent::triplify($id, parent::getVoc() . "effect", "umls:" . $meddra_concept_id)); if ($placebo) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "placebo", "true", "xsd:boolean")); } $number = false; if (is_numeric($freq)) { $flabel = $freq . "%"; $ftype_label = "Exact-Frequency"; $ftype = parent::getVoc() . $ftype_label; $number = true; } else { $flabel = $freq; $ftype_label = "Qualitative-Frequency"; $ftype = parent::getVoc() . "{$ftype_label}"; } if ($freq_lower != $freq_upper) { $flabel .= "({$freq_lower}-{$freq_upper})"; $ftype_label = "Range-Frequency"; $ftype = parent::getVoc() . $ftype_label; } $fid = $id . md5($a[5] . $a[6] . $a[8]); parent::addRDF(parent::triplify($id, parent::getVoc() . "frequency", $fid) . parent::describeIndividual($fid, $flabel, $ftype) . parent::describeClass($ftype, $ftype_label)); if ($number == true) { parent::addRDF(parent::triplifyString($fid, parent::getVoc() . "frequency-value", $freq / 100)); } else { parent::addRDF(parent::triplifyString($fid, parent::getVoc() . "frequency-value", $freq)); } parent::addRDF(parent::triplifyString($fid, parent::getVoc() . "lower-frequency", sprintf("%.3f", $freq_lower)) . parent::triplifyString($fid, parent::getVoc() . "upper-frequency", sprintf("%.3f", $freq_upper))); parent::setCheckpoint('record'); } parent::setCheckpoint('file'); }
function parse($file) { $xml = new CXML($file); $xml->parse(); $entry = $xml->getXMLRoot(); if (!isset($entry) or !$entry) { return false; } foreach ($entry->children() as $o) { $rsid = "rs" . $o->attributes()->rsId; $id = parent::getNamespace() . $rsid; $type = parent::getVoc() . ucfirst(str_replace(" ", "-", (string) $o->attributes()->snpClass)); $snpclass = parent::getVoc() . (string) $o->attributes()->snpClass; $moltype = parent::getVoc() . (string) $o->attributes()->molType; // attributes parent::addRDF(parent::describeIndividual($id, $rsid, $type) . parent::describeClass($type, ucfirst("" . $o->attributes()->snpClass)) . parent::triplify($id, parent::getVoc() . "mol-type", $moltype) . parent::describeClass($moltype, (string) $o->attributes()->molType, parent::getVoc() . "Moltype") . parent::describeClass(parent::getVoc() . "Moltype", "Moltype") . parent::triplify($id, parent::getVoc() . "taxid", "taxonomy:" . (string) $o->attributes()->taxId)); $genotype = (string) $o->attributes()->genoType; if ($genotype) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "genotype", parent::getVoc() . $genotype, "xsd:bool")); } // frequency // create/update /* if(!isset($o->Update)) $a = $o->Create; else $a = $o->Update; parent::addRDF(parent::triplifyString($id,parent::getVoc()."build",(string) $a->attributes()->build)); */ //validation $a = $o->Validation; parent::addRDF(parent::triplifyString($id, parent::getVoc() . "validation-by-cluster", (string) $a->attributes()->byCluster) . parent::triplifyString($id, parent::getVoc() . "validation-by-frequency", (string) $a->attributes()->byFrequency) . parent::triplifyString($id, parent::getVoc() . "validation-by-2hit2allele", (string) $a->attributes()->by2Hit2Allele) . parent::triplifyString($id, parent::getVoc() . "validation-by-1000G", (string) $a->attributes()->by1000G)); //hgvs names foreach ($o->hgvs as $name) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "hgvs-name", (string) $name)); } // assembly $assembly = $o->Assembly; if ($assembly and $assembly->attributes()->reference == "true") { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "dbsnp-build", (string) $assembly->attributes()->dbSnpBuild) . parent::triplifyString($id, parent::getVoc() . "genome-build", (string) $assembly->attributes()->genomeBuild)); $component = $assembly->Component; if ($component) { parent::addRDF(parent::triplify($id, parent::getVoc() . "contig-accession", "genbank:" . (string) $component->attributes()->accession) . parent::triplify($id, parent::getVoc() . "contig-gi", "gi:" . (string) $component->attributes()->gi) . parent::triplifyString($id, parent::getVoc() . "chromosome", (string) $component->attributes()->chromosome)); $maploc = $component->MapLoc; if ($maploc) { foreach ($maploc->children() as $fxnset) { $fxnset_id = parent::getRes() . md5($fxnset->asXML()); parent::addRDF(parent::triplify($id, parent::getVoc() . "maps-to", $fxnset_id) . parent::triplify($fxnset_id, "rdf:type", parent::getVoc() . "Fxnset") . parent::describeClass(parent::getVoc() . "Fxnset", "Fxnset")); if (isset($fxnset->attributes()->geneId)) { parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "gene", "ncbigene:" . (string) $fxnset->attributes()->geneId)); } if (isset($fxnset->attributes()->symbol)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "gene-symbol", (string) $fxnset->attributes()->symbol)); } if (isset($fxnset->attributes()->mrnaAcc)) { parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "mrna", "refseq:" . (string) $fxnset->attributes()->mrnaAcc)); } if (isset($fxnset->attributes()->protAcc)) { parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "protein", "refseq:" . (string) $fxnset->attributes()->protAcc)); } if (isset($fxnset->attributes()->fxnClass)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "fxn-class", (string) $fxnset->attributes()->fxnClass)); } if (isset($fxnset->attributes()->allele)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "allele", (string) $fxnset->attributes()->allele)); } if (isset($fxnset->attributes()->residue)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "residue", (string) $fxnset->attributes()->residue)); } if (isset($fxnset->attributes()->readingFrame)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "reading-frame", (string) $fxnset->attributes()->readingFrame)); } if (isset($fxnset->attributes()->aaPosition)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "position", (string) $fxnset->attributes()->aaPosition)); } } } } } } unset($xml); }
function process($file) { $z = 1; while ($l = parent::getReadFile()->read(100000)) { if ($z % 100000 == 0) { parent::clear(); } if ($l[0] == "!") { continue; } $fields = explode("\t", $l); if (count($fields) != 17) { trigger_error("Expected 17 columns, but found " . count($fields), E_USER_ERROR); return false; } //get the Go id $db = $fields[0]; $id = $fields[1]; $symbol = $fields[2]; $qualifier = $fields[3]; $goid = substr($fields[4], 3); $refs = $this->getDbReferences($fields[5]); $eco = $this->getEvidenceCodeLabelArr($fields[6]); $aspect = $this->getAspect($fields[8]); $label = $fields[9]; $synonyms = explode("|", $fields[10]); $taxid = $fields[12]; $date = $this->parseDate($fields[13]); $assignedBy = $fields[14]; //entity id $eid = $this->getdbURI($db, $id); if (!$eid) { print_r($fields); continue; } parent::addRDF(parent::describeIndividual($eid, $label, parent::getVoc() . "GO-Annotation") . parent::describeClass(parent::getVoc() . "GO-Annotation", "GO Annotation") . parent::triplifyString($eid, parent::getVoc() . "symbol", $symbol)); parent::addRDF(parent::triplify($eid, parent::getVoc() . "x-taxonomy", $taxid)); foreach ($synonyms as $s) { if (!empty($s)) { parent::addRDF(parent::triplifyString($eid, parent::getVoc() . "synonym", $s)); } } $rel = $aspect; if ($qualifier == 'NOT') { if ($aspect == 'process') { $rel = 'not-in-process'; } if ($aspect == 'function') { $rel = 'not-has-function'; } if ($aspect == 'component') { $rel = 'not-in-component'; } } parent::addRDF(parent::describeObjectProperty(parent::getVoc() . $rel, str_replace("-", " ", $rel)) . parent::triplify($eid, parent::getVoc() . $rel, "go:" . $goid)); $type = key($eco); $aid = parent::getRes() . $file . "_" . $z++; parent::addRDF(parent::describeObjectProperty(parent::getVoc() . "go-annotation", "GO annotation") . parent::triplify($eid, parent::getVoc() . "go-annotation", $aid)); $cat = parent::getRes() . md5($aspect); parent::addRDF(parent::describeIndividual($aid, "{$id}-go:{$goid} association", parent::getVoc() . "GO-Annotation") . parent::triplify($aid, parent::getVoc() . "target", $eid) . parent::triplify($aid, parent::getVoc() . "go-term", "go:" . $goid) . parent::triplify($aid, parent::getVoc() . "evidence", "eco:" . $eco[$type][1]) . parent::triplify($aid, parent::getVoc() . "go-category", $cat) . parent::describeClass($cat, $aspect) . parent::triplifyString($aid, parent::getVoc() . "assigned-by", $assignedBy)); if ($date != '') { parent::addRDF(parent::triplifyString($aid, parent::getVoc() . "entry-date", $date . "T00:00:00Z", "xsd:dateTime")); } foreach ($refs as $ref) { $b = explode(":", $ref); if ($b[0] == 'PMID') { parent::addRDF(parent::triplify($aid, parent::getVoc() . "article", "pubmed:" . $b[1])); } } //write RDF to file parent::writeRDFBufferToWriteFile(); } }
function ParseEntry($obj, $type) { $o = $obj["omim"]["entryList"][0]["entry"]; $omim_id = $o['mimNumber']; $omim_uri = parent::getNamespace() . $o['mimNumber']; if (isset($o['version'])) { parent::setDatasetVersion($o['version']); } // add the links parent::addRDF($this->QQuadO_URL($omim_uri, "rdfs:seeAlso", "http://omim.org/entry/" . $omim_id)); parent::addRDF($this->QQuadO_URL($omim_uri, "owl:sameAs", "http://identifiers.org/omim/" . $omim_id)); // parse titles $titles = $o['titles']; parent::addRDF(parent::describeIndividual($omim_uri, $titles['preferredTitle'], parent::getVoc() . str_replace(array(" ", "/"), "-", ucfirst($type))) . parent::describeClass(parent::getVoc() . str_replace(array(" ", "/"), "-", ucfirst($type)), $type)); if (isset($titles['preferredTitle'])) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "preferred-title", $titles['preferredTitle'])); } if (isset($titles['alternativeTitles'])) { $b = explode(";;", $titles['alternativeTitles']); foreach ($b as $title) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "alternative-title", trim($title))); } } // parse text sections if (isset($o['textSectionList'])) { foreach ($o['textSectionList'] as $i => $section) { if ($section['textSection']['textSectionTitle'] == "Description") { parent::addRDF(parent::triplifyString($omim_uri, "dc:description", $section['textSection']['textSectionContent'])); } else { $p = str_replace(" ", "-", strtolower($section['textSection']['textSectionTitle'])); parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "{$p}", $section['textSection']['textSectionContent'])); } // parse the omim references preg_match_all("/\\{([0-9]{6})\\}/", $section['textSection']['textSectionContent'], $m); if (isset($m[1][0])) { foreach ($m[1] as $oid) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "refers-to", "omim:{$oid}")); } } } } // allelic variants if (isset($o['allelicVariantList'])) { foreach ($o['allelicVariantList'] as $i => $v) { $v = $v['allelicVariant']; $uri = parent::getRes() . "{$omim_id}" . "_allele_" . $i; $label = str_replace("\n", " ", $v['name']); parent::addRDF(parent::describeIndividual($uri, $label, parent::getVoc() . "Allelic-Variant") . parent::describeClass(parent::getVoc() . "Allelic-Variant", "Allelic Variant")); if (isset($v['alternativeNames'])) { $names = explode(";;", $v['alternativeNames']); foreach ($names as $name) { $name = str_replace("\n", " ", $name); parent::addRDF(parent::triplifyString($uri, parent::getVoc() . "alternative-names", $name)); } } if (isset($v['text'])) { parent::addRDF(parent::triplifyString($uri, "dc:description", $v['text'])); } if (isset($v['mutations'])) { parent::addRDF(parent::triplifyString($uri, parent::getVoc() . "mutation", $v['mutations'])); } if (isset($v['dbSnps'])) { $snps = explode(",", $v['dbSnps']); foreach ($snps as $snp) { parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-dbsnp", "dbsnp:" . $snp)); } } parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "variant", $uri)); } } // clinical synopsis if (isset($o['clinicalSynopsis'])) { $cs = $o['clinicalSynopsis']; $cs_uri = parent::getRes() . "" . $omim_id . "_cs"; parent::addRDF(parent::describeIndividual($cs_uri, "Clinical synopsis for omim {$omim_id}", parent::getVoc() . "Clinical-Synopsis") . parent::describeClass(parent::getVoc() . "Clinical-Synopsis", "Clinical Synopsis") . parent::triplify($omim_uri, parent::getVoc() . "clinical-synopsis", $cs_uri)); foreach ($cs as $k => $v) { if (!strstr($k, "Exists")) { // ignore the boolean assertion. // @todo ignore provenance for now if (in_array($k, array('contributors', 'creationDate', 'editHistory', 'epochCreated', 'dateCreated', 'epochUpdated', 'dateUpdated'))) { continue; } if (!is_array($v)) { $v = array($k => $v); } foreach ($v as $k1 => $v1) { $phenotypes = explode(";", $v1); foreach ($phenotypes as $coded_phenotype) { // parse out the codes $coded_phenotype = trim($coded_phenotype); if (!$coded_phenotype) { continue; } $phenotype = preg_replace("/\\{.*\\}/", "", $coded_phenotype); $phenotype_id = parent::getRes() . "" . md5(strtolower($phenotype)); $entity_id = parent::getRes() . "" . $k1; parent::addRDF(parent::describeIndividual($phenotype_id, $phenotype, parent::getVoc() . 'Characteristic') . parent::describeClass(parent::getVoc() . 'Characteristic', 'Characteristic') . parent::triplify($cs_uri, parent::getVoc() . "feature", $phenotype_id) . parent::describeIndividual($entity_id, $k1, parent::getVoc() . "Entity") . parent::describeClass(parent::getVoc() . "Entity", "Entity") . parent::triplify($phenotype_id, parent::getVoc() . "characteristic-of", $entity_id)); // parse out the vocab references preg_match_all("/\\{([0-9A-Za-z \\:\\-\\.]+)\\}|;/", $coded_phenotype, $codes); //preg_match_all("/((UMLS|HPO HP|SNOMEDCT|ICD10CM|ICD9CM|EOM ID)\:[A-Z0-9]+)/",$coded_phenotype,$m); if (isset($codes[1][0])) { foreach ($codes[1] as $entry) { $entries = explode(" ", trim($entry)); foreach ($entries as $e) { if ($e == "HPO" || $e == "EOM") { continue; } $this->getRegistry()->parseQName($e, $ns, $id); if (!isset($ns) || $ns == '') { $b = explode(".", $id); $ns = "omim"; $id = $b[0]; } else { $ns = str_replace(array("hpo", "id", "icd10cm", "icd9cm", "snomedct"), array("hp", "eom", "icd10", "icd9", "snomed"), $ns); } parent::addRDF(parent::triplify($phenotype_id, parent::getVoc() . "x-{$ns}", "{$ns}:{$id}")); } // foreach } // foreach } // codes } //foreach } // foreach } // exists } } // clinical synopsis // genemap if (isset($o['geneMap'])) { $map = $o['geneMap']; if (isset($map['chromosome'])) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "chromosome", (string) $map['chromosome'])); } if (isset($map['cytoLocation'])) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "cytolocation", (string) $map['cytoLocation'])); } if (isset($map['geneSymbols'])) { $b = preg_split("/[,;\\. ]+/", $map['geneSymbols']); foreach ($b as $symbol) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "gene-symbol", "symbol:" . trim($symbol))); } } if (isset($map['geneName'])) { $b = explode(",", $map['geneName']); foreach ($b as $name) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "gene-name", trim($name))); } } if (isset($map['mappingMethod'])) { $b = explode(",", $map['mappingMethod']); foreach ($b as $c) { $mapping_method = trim($c); $method_uri = $this->get_method_type($mapping_method); if ($method_uri !== false) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "mapping-method", $method_uri)); } } } if (isset($map['mouseGeneSymbol'])) { $b = explode(",", $map['mouseGeneSymbol']); foreach ($b as $c) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "mouse-gene-symbol", "symbol:" . strtoupper($c))); } } if (isset($map['mouseMgiID'])) { $b = explode(",", $map['mouseMgiID']); foreach ($b as $c) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-mgi", $c)); } } if (isset($map['geneInheritance']) && $map['geneInheritance'] != '') { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "gene-inheritance", $map['geneInheritance'])); } } if (isset($o['phenotypeMapList'])) { foreach ($o['phenotypeMapList'] as $i => $phenotypeMap) { $phenotypeMap = $phenotypeMap['phenotypeMap']; $pm_uri = parent::getRes() . $omim_id . "_pm_" . ($i + 1); parent::addRDF(parent::describeIndividual($pm_uri, "phenotype mapping for {$omim_id}", parent::getVoc() . "Phenotype-Map") . parent::describeClass(parent::getVoc() . "Phenotype-Map", "OMIM Phenotype-Map") . parent::triplify($omim_uri, parent::getVoc() . "phenotype-map", $pm_uri)); foreach (array_keys($phenotypeMap) as $k) { if (in_array($k, array("mimNumber", "phenotypeMimNumber", "phenotypicSeriesMimNumber"))) { parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . $k, "omim:" . $phenotypeMap[$k])); } else { if ($k == "geneSymbols") { $l = explode(", ", $phenotypeMap[$k]); foreach ($l as $gene) { parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . "gene-symbol", "hgnc.symbol:" . $gene)); } } else { if ($k == "phenotypeMappingKey") { $l = $this->get_phenotype_mapping_method_type($phenotypeMap[$k]); parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . "mapping-method", $l)); } else { parent::addRDF(parent::triplifyString($pm_uri, parent::getVoc() . $k, $phenotypeMap[$k])); } } } } } } // references if (isset($o['referenceList'])) { foreach ($o['referenceList'] as $i => $r) { $r = $r['reference']; if (isset($r['pubmedID'])) { $pubmed_uri = "pubmed:" . $r['pubmedID']; parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "article", $pubmed_uri)); $title = 'article'; if (isset($r['title'])) { $title = $r['title']; } parent::addRDF(parent::describe($pubmed_uri, addslashes($r['title']))); if (isset($r['articleUrl'])) { parent::addRDF($this->QQuadO_URL($pubmed_uri, "rdfs:seeAlso", htmlentities($r['articleUrl']))); } } } } // external ids if (isset($o['externalLinks'])) { foreach ($o['externalLinks'] as $k => $id) { if ($id === false) { continue; } $ns = ''; switch ($k) { case 'approvedGeneSymbols': $ns = 'symbol'; break; case 'geneIDs': $ns = 'ncbigene'; break; case 'ncbiReferenceSequences': $ns = 'gi'; break; case 'genbankNucleotideSequences': $ns = 'gi'; break; case 'proteinSequences': $ns = 'gi'; break; case 'uniGenes': $ns = 'unigene'; break; case 'ensemblIDs': $ns = 'ensembl'; break; case 'swissProtIDs': $ns = 'uniprot'; break; case 'mgiIDs': $ns = 'mgi'; $b = explode(":", $id); $id = $b[1]; break; case 'flybaseIDs': $ns = 'flybase'; break; case 'zfinIDs': $ns = 'zfin'; break; case 'hprdIDs': $ns = 'hprd'; break; case 'orphanetDiseases': $ns = 'orphanet'; break; case 'refSeqAccessionIDs': $ns = 'refseq'; break; case 'ordrDiseases': $ns = 'ordr'; $b = explode(";;", $id); $id = $b[0]; break; case 'snomedctIDs': $ns = 'snomed'; break; case 'icd10cmIDs': $ns = 'icd10'; break; case 'icd9cmIDs': $ns = 'icd9'; break; case 'umlsIDs': $ns = 'umls'; break; case 'wormbaseIDs': $ns = 'wormbase'; break; case 'diseaseOntologyIDs': $ns = 'do'; break; // specifically ignorning // specifically ignorning case 'geneTests': case 'cmgGene': case 'geneticAllianceIDs': // # // # case 'nextGxDx': case 'nbkIDs': // NBK1207;;Alport Syndrome and Thin Basement Membrane Nephropathy // NBK1207;;Alport Syndrome and Thin Basement Membrane Nephropathy case 'newbornScreeningUrls': case 'decipherUrls': case 'geneReviewShortNames': case 'locusSpecificDBs': case 'geneticsHomeReferenceIDs': case 'omiaIDs': case 'coriellDiseases': case 'clinicalDiseaseIDs': case 'possumSyndromes': case 'keggPathways': case 'gtr': case 'gwasCatalog': case 'mgiHumanDisease': case 'wormbaseDO': case 'dermAtlas': // true/false break; default: echo "unhandled external link {$k} {$id}" . PHP_EOL; } $ids = explode(",", $id); foreach ($ids as $id) { if ($ns) { if (strstr($id, ";;") === FALSE) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-{$ns}", $ns . ':' . $id)); } else { $b = explode(";;", $id); // multiple ids//names foreach ($b as $c) { preg_match("/([a-z])/", $c, $m); if (!isset($m[1])) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-{$ns}", $ns . ':' . $c)); } } } } } } } //external links }
/** * add an RDF representation of the incoming param to the model. * @$qual_record_arr is an assoc array with the contents of one qualifier record */ private function makeQualifierRecordRDF($qual_record_arr) { //get the UI of the qualifier record $qr_ui = $qual_record_arr["UI"][0]; $qr_res = $this->getNamespace() . $qr_ui; $qr_label = $qual_record_arr['SH'][0]; parent::AddRDF(parent::describeIndividual($qr_res, $qr_label, $this->getVoc() . "Qualifier-Descriptor", $qr_label) . parent::describeClass($this->getVoc() . "Qualifier-Descriptor", "MeSH Qualifier Descriptor")); //now get the descriptor_data_elements $qde = $this->getQualifierDataElements(); //iterate over the properties foreach ($qual_record_arr as $k => $v) { if (array_key_exists($k, $qde)) { if ($k == "AN") { foreach ($v as $kv => $vv) { //explode by semicolon $vvrar = explode(";", $vv); foreach ($vvrar as $anAn) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde["AN"], $anAn) . parent::describeProperty($this->getVoc() . $qde["AN"], "Relationship between a qualifier record and its annotation")); } //foreach } //foreach } //if if ($k == "DA") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['DA'], $this->formatDate($vv), "xsd:date") . parent::describeProperty($this->getVoc() . $qde['DA'], "Relationship between a qualifier record and its date of entry")); } } //if if ($k == "DQ") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['DQ'], $this->formatDate($vv), "xsd:date") . parent::describeProperty($this->getVoc() . $qde['DQ'], "Relationship between a qualifier record and its date qualifier established")); } } //if if ($k == "GM") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['GM'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['GM'], "Relationship between a qualifier record and its grateful med note")); } } if ($k == "HN") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['HN'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['HN'], "Relationship between a qualifier record and its history note")); } } if ($k == "HN") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['HN'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['HN'], "Relationship between a qualifier record and its history note")); } } if ($k == "MED") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['MED'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['MED'], "Relationship between a qualifier record and its backfile postings")); } } if ($k == "M94") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['M94'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M94'], "Relationship between a qualifier record and its backfile postings")); } } if ($k == "M90") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['M90'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M90'], "Relationship between a qualifier record and its backfile postings")); } } if ($k == "M85") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['M85'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M85'], "Relationship between a qualifier record and its backfile postings")); } } if ($k == "M80") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['M80'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M80'], "Relationship between a qualifier record and its backfile postings")); } } if ($k == "M75") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['M75'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M75'], "Relationship between a qualifier record and its backfile postings")); } } if ($k == "M66") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['M66'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M66'], "Relationship between a qualifier record and its backfile postings")); } } if ($k == "MR") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['MR'], $this->formatDate($vv), "xsd:date") . parent::describeProperty($this->getVoc() . $qde['MR'], "Relationship between a qualifier record and its major revision date")); } } //if if ($k == "MS") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['MS'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['MS'], "Relationship between a qualifier record and its MeSH scope note")); } } if ($k == "OL") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['OL'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['OL'], "Relationship between a qualifier record and its online note")); } } if ($k == "QA") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['QA'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['QA'], "Relationship between a qualifier record and its toplical qualifier abbreviation")); } } if ($k == "QE") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['QE'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['QE'], "Relationship between a qualifier record and its qualifier entry version")); } } if ($k == "QS") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['QS'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['QS'], "Relationship between a qualifier record and its qualifier sort version")); } } if ($k == "QT") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['QT'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['QT'], "Relationship between a qualifier record and its qualifier type")); } } if ($k == "QX") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['QX'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['QX'], "Relationship between a qualifier record and its qualifier cross reference")); } } if ($k == "RECTYPE") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['RECTYPE'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['RECTYPE'], "Relationship between a qualifier record and its record type")); } } if ($k == "SH") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['SH'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['SH'], "Relationship between a qualifier record and its subheading")); } } if ($k == "TN") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($qr_res, $this->getVoc() . $qde['TN'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['TN'], "Relationship between a qualifier record and its tree node allowed")); } } } else { trigger_error("Please add key to qualifier record map: " . $k . PHP_EOL, E_USER_ERROR); } //else $this->WriteRDFBufferToWriteFile(); } //foreach $this->WriteRDFBufferToWriteFile(); }
function Parse() { $l = parent::getReadFile()->read(100000); $header = explode("\t", trim(substr($l, 1))); if (($c = count($header)) != 54) { trigger_erorr("Expecting 54 columns, found {$c}!"); return FALSE; } // check # of columns while ($l = parent::getReadFile()->read(500000)) { $a = explode("\t", trim($l)); // irefindex identifiers $rigid = "irefindex." . $a[34]; # checksum for interaction $rogida = "irefindex." . $a[32]; # checksum for A $rogidb = "irefindex." . $a[33]; # checksum for B $irigid = "irefindex.irigid:" . $a[44]; # integer id for interaction $irogida = "irefindex.irogid:" . $a[42]; # integer id for A $irogidb = "irefindex.irogid:" . $a[43]; # integer id for B $crigid = "irefindex.crigid:" . $a[47]; # checksum for canonical interaction $icrigid = "irefindex.icrigid:" . $a[50]; # integer id for canonical interaction $crogida = "irefindex.crogid:" . $a[45]; # checksum for A's canonical group $crogidb = "irefindex.crogid:" . $a[46]; # checksum for B's canonical group $icrogida = "irefindex.icrogid:" . $a[48]; # integer for A's canonical group $icrogidb = "irefindex.icrogid:" . $a[49]; # integer for B's canonical group // 13 contains the original identifier, the rigid, and the edgetype $ids = explode("|", $a[13]); if (count($ids) != 3) { trigger_error("Expecting 3 entries in column 14"); print_r($ids); exit; } parent::getRegistry()->parseQName($ids[0], $ns, $id); if ($id == '-') { // this happens with hprd $iid = "hprd:" . substr($ids[1], 6); } else { $iid = $ns . ":" . $id; } // get the type if ($a[52] == "X") { $label = "{$a['0']} - {$a['1']} Interaction"; $type = "Pairwise-Interaction"; } else { if ($a[52] == "C") { $label = $a[53] . " component complex"; #num of participants $type = "Multimeric-Complex"; } else { if ($a[52] == "Y") { $label = "{$a['0']} homomeric complex"; $type = "Homopolymeric-Complex"; } } } parent::addRDF(parent::describeIndividual($iid, $label, parent::getVoc() . $type) . parent::describeClass(parent::getVoc() . $type, str_replace("-", " ", $type))); // interaction type[52] by method[6] unset($method); if ($a[6] != '-') { $data = $this->ParseStringArray($a[6]); $method = trim($data["label"]); $qname = trim($data["ns"]) . ":" . trim($data["id"]); if ($qname) { parent::addRDF(parent::triplify($iid, parent::getVoc() . "method", $qname) . parent::describeClass($qname, $data['label'])); } } parent::addRDF(parent::triplify($iid, "rdfs:seeAlso", "http://wodaklab.org/iRefWeb/interaction/show/" . $a[50])); // set the interactors for ($i = 0; $i <= 1; $i++) { $p = 'a'; if ($i == 1) { $p = 'b'; } $data = $this->ParseStringArray($a[$i]); $interactor = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}", $interactor)); // biological role $role = $a[16 + $i]; if ($role != '-') { $data = $this->ParseStringArray($role); $qname = trim($data["ns"]) . ":" . trim($data["id"]); if ($qname != "mi:0000") { parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}" . "_biological_role", $qname) . parent::describeClass($qname, $data['label'])); } } // experimental role $role = $a[18 + $i]; if ($role != '-') { $data = $this->ParseStringArray($role); $qname = trim($data["ns"]) . ":" . trim($data["id"]); if ($qname != "mi:0000") { parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}" . "_experimental_role", $qname) . parent::describeClass($qname, $data['label'])); } } // interactor type $type = $a[20 + $i]; if ($type != '-') { $data = $this->ParseStringArray($type); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($interactor, "rdf:type", $qname) . parent::describeClass($qname, $data['label'])); } } // add the alternatives through the taxon + seq redundant group for ($i = 2; $i <= 3; $i++) { $taxid = ''; $rogid = "irefindex." . $a[32 + ($i - 2)]; parent::addRDF(parent::describeIndividual($rogid, "", parent::getVoc() . "Taxon-Sequence-Identical-Group") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Identical-Group", "Taxon + Sequence Identical Group")); $tax = $a[9 + ($i - 2)]; if ($tax && $tax != '-' && $tax != '-1') { $data = $this->ParseStringArray($tax); $taxid = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($rogid, parent::getVoc() . "x-taxonomy", $taxid)); } $list = explode("|", $a[3 + ($i - 2)]); foreach ($list as $item) { $data = $this->ParseStringArray($item); $ns = trim($data["ns"]); $id = trim($data["id"]); $qname = $ns . ":" . $id; if ($ns && $ns != 'rogid' && $ns != 'irogid' and $id != '-') { parent::addRDF(parent::triplify($rogid, parent::getVoc() . "has-member", $qname)); if ($taxid && $taxid != '-' && $taxid != '-1') { parent::addRDF(parent::triplify($qname, parent::getVoc() . "x-taxonomy", $taxid)); } } } } // publications $list = explode("|", $a[8]); foreach ($list as $item) { if ($item == '-' && $item != 'pubmed:0') { continue; } $data = $this->ParseStringArray($item); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, parent::getVoc() . "article", $qname)); } // MI interaction type if ($a[11] != '-' && $a[11] != 'NA') { $data = $this->ParseStringArray($a[11]); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, "rdf:type", $qname)); if (!isset($defined[$qname])) { $defined[$qname] = ''; parent::addRDF(parent::triplifyString($qname, "rdfs:label", $data['label'])); } } // source if ($a[12] != '-') { $data = $this->ParseStringArray($a[12]); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, parent::getVoc() . "source", $qname)); } // confidence $list = explode("|", $a[14]); foreach ($list as $item) { $data = $this->ParseStringArray($item); $ns = trim($data["ns"]); $id = trim($data["id"]); if ($ns == 'lpr') { // lowest number of distinct interactions that any one article reported parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "minimum-number-interactions-reported", $id)); } else { if ($ns == "hpr") { // higher number of distinct interactions that any one article reports parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "maximum-number-interactions-reported", $id)); } else { if ($ns = 'hp') { // total number of unique PMIDs used to support the interaction parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "number-supporting-articles", $id)); } } } } // expansion method if ($a[15]) { $id = parent::getRes() . md5($a[15]); parent::addRDF(parent::describeIndividual($id, $a[15], parent::getVoc() . "Expansion-Method") . parent::describeClass(parent::getVoc() . "Expansion-Method", "Expansion Method") . parent::triplify($iid, parent::getVoc() . "expansion-method", $id)); } // host organism if ($a[28] != '-') { $data = $this->ParseStringArray($a[28]); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, parent::getVoc() . "host-organism", $qname)); } // @todo add to record // created 2010/05/18 $date = str_replace("/", "-", $a[30]) . "T00:00:00Z"; parent::addRDF(parent::triplifyString($iid, "dc:created", $date, "xsd:dateTime")); // taxon-sequence identical interaction group parent::addRDF(parent::triplify($iid, parent::getVoc() . "taxon-sequence-identical-interaction", $rigid) . parent::triplify($rigid, "rdf:type", parent::getVoc() . "Taxon-Sequence-Identical-Interaction") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Identical-Interaction", "Taxon + Sequence Identical Interaction") . parent::triplify($rigid, parent::getVoc() . "irigid", $irigid) . parent::triplify($rigid, parent::getVoc() . "interactor-a", $rogida) . parent::triplify($rogida, parent::getVoc() . "irogid", $irogida) . parent::triplify($rigid, parent::getVoc() . "interactor-b", $rogidb) . parent::triplify($rogidb, parent::getVoc() . "irogid", $irogidb) . parent::triplify($rogida, parent::getVoc() . "canonical-group", $crogida) . parent::triplify($rogidb, parent::getVoc() . "canonical-group", $crogidb) . parent::triplify($rigid, parent::getVoc() . "taxon-sequence-similar-interaction", $crigid) . parent::triplify($crigid, "rdf:type", parent::getVoc() . "Taxon-Sequence-Canonical-Interaction") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Canonical-Interaction", "Taxon + Sequence Canonical Interaction") . parent::triplify($crigid, parent::getVoc() . "icrigid", $icrigid) . parent::triplify($crigid, parent::getVoc() . "interactor-a-canonical-group", $crogida) . parent::triplify($crogida, "rdf:type", parent::getVoc() . "Taxon-Sequence-Similar-Group") . parent::triplify($crogida, parent::getVoc() . "icrogid", $icrogida) . parent::triplify($crigid, parent::getVoc() . "interactor-b-canonical-group", $crogidb) . parent::triplify($crogidb, "rdf:type", parent::getVoc() . "Taxon-Sequence-Similar-Group") . parent::triplify($crogidb, parent::getVoc() . "icrogid", $icrogidb) . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Similar-Group", "Taxon + Sequence Similar Group")); parent::writeRDFBufferToWriteFile(); } }
function models() { $tax_ids = array("Caenorhabditis elegans" => "6239", "Mus musculus" => "10090", "Saccharomyces cerevisiae" => "4932", "Drosophila melanogaster" => "7227", "Podospora anserina" => "5145", "Mesocricetus auratus" => "10036", "Schizosaccharomyces pombe" => "4896", "Danio rerio" => "7955"); $h = explode(",", parent::getReadFile()->read()); $expected_columns = 8; if (($n = count($h)) != $expected_columns) { trigger_error("Found {$n} columns in gene file - expecting {$expected_columns}!", E_USER_WARNING); return false; } /* [0] GenAge ID [1] symbol [2] name [3] organism [4] entrez gene id [5] avg lifespan change (max obsv) [6] lifespan effect [7] longevity influence */ while ($l = parent::getReadFile()->read(200000)) { $data = str_getcsv($l); $genage = str_pad($data[0], 4, "0", STR_PAD_LEFT); $gene_symbol = $data[1]; $name = $data[2]; $organism = $data[3]; $ncbi_gene_id = $data[4]; $max_percent_obsv_avg_lifespan_change = $data[5]; $lifespan_effect = $data[6]; $longevity_influence = $data[7]; $genage_id = parent::getNamespace() . $genage; parent::addRDF(parent::describeIndividual($genage_id, $name, parent::getVoc() . "Aging-Related-Gene") . parent::describeClass(parent::getVoc() . "Aging-Related-Gene", "Aging Related Gene")); parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "gene-symbol", parent::safeLiteral($gene_symbol))); parent::addRDF(parent::triplify($genage_id, parent::getVoc() . "taxon", "ncbitaxon:" . $tax_ids[$organism])); if ($ncbi_gene_id !== "") { parent::addRDF(parent::triplify($genage_id, parent::getVoc() . "x-ncbigene", "ncbigene:" . $ncbi_gene_id)); } if ($max_percent_obsv_avg_lifespan_change !== "") { parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "maximum-percent-observed-average-lifespan-change", parent::safeLiteral($max_percent_obsv_avg_lifespan_change))); } if ($lifespan_effect == "Increase and Decrease") { parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", "increase") . parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", "decrease")); } else { parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", strtolower($lifespan_effect))); } parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "longevity-influence", strtolower($longevity_influence))); parent::WriteRDFBufferToWriteFile(); } }
function parseKGML($lfile) { $pathway = simplexml_load_file($lfile); if ($pathway === false) { echo "Error in parsing {$lfile}" . PHP_EOL; return; } $pathway_id = str_replace("path", "kegg", $pathway['name']); $base_id = str_replace("kegg", "kegg_resource", $pathway_id) . "."; parent::addRDF(parent::describeIndividual($pathway_id, $pathway['title'], parent::getVoc() . "Pathway") . parent::triplify($pathway_id, "rdfs:seeAlso", $pathway['link']) . parent::triplify($pathway_id, "foaf:depiction", $pathway['image'])); // get the entries foreach ($pathway->children() as $type => $item) { if ($type == "entry") { $eid = $base_id . $item['id']; $entries["" . $item['id']] = "" . $item['name']; parent::addRDF(parent::describeIndividual($eid, $item['name'], parent::getVoc() . "Ortholog-Group") . parent::describeClass(parent::getVoc() . "Ortholog-Group", "KEGG Ortholog Group")); $mids = explode(" ", $item['name']); foreach ($mids as $mid) { if ($item['type'] == 'path') { $mid = str_replace($mid, ":", "_"); } else { $mid = substr($mid, strpos($mid, ":") + 1); } parent::addRDF(parent::triplify($eid, parent::getVoc() . "member", "kegg:" . $mid)); } } } // iterate over the relations, reactions foreach ($pathway->children() as $type => $item) { if ($type == "relation") { /* <relation entry1="70" entry2="73" type="ECrel"> <subtype name="compound" value="86"/> </relation> <relation entry1="26" entry2="25" type="PPrel"> <subtype name="compound" value="17"/> <subtype name="activation" value="-->"/> </relation> */ $id1 = "" . $item['entry1']; $id2 = "" . $item['entry2']; $type = "" . $type; $relation_id = str_replace("kegg", "kegg_resource", $pathway_id) . "." . $id1 . "." . $id2 . "." . $type; $label = $type . " relation between " . $entries[$id1] . " and " . $entries[$id2]; parent::addRDF(parent::describeIndividual($relation_id, $label, parent::getVoc() . "Pathway-Relation") . parent::describeClass(parent::getVoc() . "Pathway-Relation", "KEGG Pathway Relation") . parent::triplify($relation_id, parent::getVoc() . "source", $base_id . $id1) . parent::triplify($relation_id, parent::getVoc() . "target", $base_id . $id2) . parent::triplify($relation_id, parent::getVoc() . "pathway", $pathway_id) . parent::triplifyString($relation_id, parent::getVoc() . "type", $item['type'])); foreach ($item->children() as $subtype) { parent::addRDF(parent::triplifyString($relation_id, parent::getVoc() . "subtype", '' . $subtype['name'])); } } else { if ($type == "reaction") { /* <reaction id="133" name="rn:R09085" type="irreversible"> <substrate id="86" name="cpd:C00267"/> <product id="90" name="cpd:C00668"/> </reaction> */ $reaction_id = str_replace("kegg", "kegg_resource", $pathway_id) . "." . substr($item['name'], strpos($item['name'], ":") + 1); $reaction_type = parent::getVoc() . ucfirst($item['type']) . "-Reaction"; parent::addRDF(parent::describeIndividual($reaction_id, $item['name'], parent::getVoc() . "Reaction") . parent::describeClass(parent::getVoc() . "Reaction", "KEGG Reaction") . parent::triplify($reaction_id, "rdf:type", $reaction_type)); foreach ($item->children() as $k => $v) { $cid = str_replace("cpd:", "kegg:", $v['name']); parent::addRDF(parent::triplify($reaction_id, parent::getVoc() . $k, $cid)); } } } } return; }
function twosides() { $items = null; $id = 0; $this->GetReadFile()->Read(); while ($l = $this->GetReadFile()->Read()) { $a = explode("\t", $l); $id++; if ($id % 10000 == 0) { $this->WriteRDFBufferToWriteFile(); } $uid = "twosides:{$id}"; $d1 = "pubchemcompound:" . (int) sprintf("%d", substr($a[0], 4)); $d1_name = $a[2]; $d2 = "pubchemcompound:" . (int) sprintf("%d", substr($a[1], 4)); $d2_name = $a[3]; $e = "umls:" . $a[4]; $e_name = strtolower($a[5]); $uid_label = "DDI between {$d1_name} and {$d2_name} leading to {$e_name}"; if (!isset($items[$d1])) { parent::addRDF(parent::describeIndividual($d1, $d1_name, parent::getVoc() . "Chemical") . parent::describeClass(parent::getVoc() . "Chemical", "PharmGKB Chemical")); $items[$d1] = ''; } if (!isset($items[$d2])) { parent::addRDF(parent::describeIndividual($d2, $d2_name, parent::getVoc() . "Chemical") . parent::describeClass(parent::getVoc() . "Chemical", "PharmGKB Chemical")); $items[$d2] = ''; } if (!isset($items[$e])) { parent::addRDF(parent::describeIndividual($e, $e_name, parent::getVoc() . "Event") . parent::describeClass(parent::getVoc() . "Event", "PharmGKB side effect event")); $items[$e] = ''; } parent::addRDF(parent::describeIndividual($uid, $uid_label, parent::getVoc() . "Drug-Drug-Association") . parent::describeClass(parent::getVoc() . "Drug-Drug-Association", "PharmGKB Twosides Drug-Drug Association") . parent::triplify($uid, parent::getVoc() . "chemical", $d1) . parent::triplify($uid, parent::getVoc() . "chemical", $d2) . parent::triplify($uid, parent::getVoc() . "event", $e) . parent::triplifyString($uid, parent::getVoc() . "p-value", $a[7])); } parent::writeRDFBufferToWriteFile(); }
function MGI_Geno_NotDisease() { $line = 1; while ($l = $this->getReadFile()->read(248000)) { $a = explode("\t", $l); if (count($a) != 8) { trigger_error("Incorrect number of columns", E_USER_WARNING); continue; } $genotype = $a[0]; $alleles = explode("|", strtolower($a[2])); $diseases = explode(",", $a[7]); foreach ($diseases as $d) { $disease = "omim:{$d}"; foreach ($alleles as $allele) { $id = parent::getRes() . md5($allele . $disease); $label = "{$allele} {$disease} absent association"; parent::addRDF(parent::describeIndividual($id, $label, $this->getVoc() . "Allele-Disease-Non-Association") . parent::describeClass($this->getVoc() . "Allele-Disease-Non-Association", "MGI Allele-Disease Non-Association") . parent::triplify($id, $this->getVoc() . "allele", $allele) . parent::triplifyString($id, $this->getVoc() . "genotype-string", $genotype) . parent::triplify($id, $this->getVoc() . "disease", $disease) . parent::triplifyString($id, $this->getVoc() . "is-negated", "true")); if ($a[5]) { $pmids = explode(",", $a[5]); foreach ($pmids as $pmid) { parent::addRDF(parent::triplify($id, $this->getVoc() . "x-pubmed", "pubmed:" . $pmid)); } } } } $this->writeRDFBufferToWriteFile(); } }
function product($fpin) { $z = 0; $list = ''; fgets($fpin); // header while ($l = fgets($fpin, 100000)) { $a = explode("\t", $l); if (count($a) != 18) { trigger_error("Expected 18 coloumns, instead found" . count($a)); continue; } $product_id = parent::getNamespace() . $a[0]; $product_label = $a[3]; $product_type_label = ucfirst(strtolower($a[2])); $product_type = parent::getVoc() . str_replace(" ", "-", $product_label); parent::addRDF(parent::describeIndividual($product_id, $a[3], parent::getVoc() . "Product") . parent::describeClass(parent::getVoc() . "Product", "NDC Product") . parent::triplify($product_id, parent::getVoc() . "product-type", $product_type) . parent::describeIndividual($product_type, $product_type_label, parent::getVoc() . "Product-Type") . parent::describeClass(parent::getVoc() . "Product-Type", "Product Type") . parent::triplifyString($product_id, parent::getVoc() . "product-id", $a[1]) . parent::triplifyString($product_id, parent::getVoc() . "proprietary-name", $a[3]) . parent::triplifyString($product_id, parent::getVoc() . "trade-name-suffix", $a[4])); if ($a[5]) { $b = explode(";", $a[5]); foreach ($b as $c) { parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "non-proprietary-name", trim($c))); } } if ($a[6]) { $b = explode(",", $a[6]); foreach ($b as $c) { $dosageform = strtolower($c); $dosageform_id = parent::getVoc() . str_replace(" ", "-", ucfirst(strtolower($c))); parent::addRDF(parent::describeIndividual($dosageform_id, $dosageform, parent::getVoc() . "Dosage-Form") . parent::describeClass(parent::getVoc() . "Dosage-Form", "NDC Dosage Form") . parent::triplify($product_id, parent::getVoc() . "dosage-form", $dosageform_id)); } } if ($a[7]) { // MV $b = explode("; ", $a[7]); foreach ($b as $c) { $route = strtolower(trim($c)); $route_id = parent::getVoc() . str_replace(" ", "-", ucfirst(strtolower($c))); parent::addRDF(parent::describeIndividual($route_id, $route, parent::getVoc() . "Route") . parent::describeClass(parent::getVoc() . "Route", "NDC Drug Route") . parent::triplify($product_id, parent::getVoc() . "route", $route_id)); } } if ($a[8]) { $date = substr(0, 4, $a[8]) . "-" . substr(4, 2, $a[8]) . "-" . substr(6, 2, $a[8]); parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "start-marketing-date", $date)); } if ($a[9]) { $date = substr(0, 4, $a[9]) . "-" . substr(4, 2, $a[9]) . "-" . substr(6, 2, $a[9]); parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "end-marketing-date", $date)); } if ($a[10]) { parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "marketing-category", $a[10])); } if ($a[11]) { parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "application-number", $a[11])); } // create a labeller node if ($a[12]) { $labeller_id = parent::getRes() . md5($a[12]); $label = addslashes($a[12]); parent::addRDF(parent::describeIndividual($labeller_id, $label, parent::getVoc() . "Labeller") . parent::describeClass(parent::getVoc() . "Labeller", "NDC Labeller") . parent::triplify($product_id, parent::getVoc() . "labeller", $labeller_id)); } // the next three are together if ($a[13]) { // MV $substances = explode(";", $a[13]); $strengths = explode(";", $a[14]); $units = explode(";", $a[15]); $l = ''; foreach ($substances as $i => $substance) { // list the active ingredient $ingredient_label = strtolower($substance); $strength = ''; if (isset($strengths[$i])) { $strength = $strengths[$i]; } $unit = $units[$i]; $ingredient_id = parent::getRes() . md5($ingredient_label); parent::addRDF(parent::describeIndividual($ingredient_id, $ingredient_label, parent::getVoc() . "Ingredient") . parent::describeClass(parent::getVoc() . "Ingredient", "NDC Ingredient") . parent::triplify($product_id, parent::getVoc() . "ingredient", $ingredient_id)); // describe the substance composition $substance_label = "{$strength} {$unit} {$ingredient_label}"; $substance_id = parent::getRes() . md5($substance_label); parent::addRDF(parent::describeIndividual($substance_id, $substance_label, parent::getVoc() . "Substance") . parent::triplifyString($substance_id, parent::getVoc() . "amount", $strength) . parent::describeClass(parent::getVoc() . "Substance", "NDC Substance")); $unit_id = parent::getVoc() . md5($unit); parent::addRDF(parent::describeIndividual($unit_id, $unit, parent::getVoc() . "Unit") . parent::describeClass(parent::getVoc() . "Unit", "NDC Unit") . parent::triplify($substance_id, parent::getVoc() . "amount_unit", $unit_id) . parent::triplify($product_id, parent::getVoc() . "has-part", $substance_id)); } } if ($a[16]) { // MV $b = explode(",", $a[16]); foreach ($b as $c) { $cat_id = parent::getVoc() . md5($c); parent::addRDF(parent::describeIndividual($cat_id, $c, parent::getVoc() . "Pharmacological-Class") . parent::describeClass(parent::getVoc() . "Pharmacological-Class", "NDC Pharmacological Class") . parent::triplify($product_id, parent::getVoc() . "pharmacological-class", $cat_id)); } } parent::WriteRDFBufferToWriteFile(); } }
function process() { while ($aLine = $this->GetReadFile()->Read(200000)) { $parsed_line = $this->parse_homologene_tab_line($aLine); $hid = "homologene:" . $parsed_line["hid"]; $hid_label = "homologene group " . $parsed_line['hid']; parent::AddRDF(parent::describeIndividual($hid, $hid_label, $this->getVoc() . "Homologene-Group") . parent::describeClass($this->getVoc() . "Homologene-Group", "Homologene Group")); $geneid = "ncbigene:" . $parsed_line["geneid"]; $taxid = "taxid:" . $parsed_line["taxid"]; $gi = "gi:" . $parsed_line["gi"]; $genesymbol = str_replace("\\", "", $parsed_line["genesymbol"]); $refseq = "refseq:" . $parsed_line["refseq"]; parent::AddRDF(parent::triplify($hid, $this->getVoc() . "x-taxid", $taxid) . parent::describeProperty($this->getVoc() . "x-taxid", "Link to NCBI taxonomy")); parent::AddRDF(parent::triplify($hid, $this->getVoc() . "x-ncbigene", $geneid) . parent::describeProperty($this->getVoc() . "x-ncbigene", "Link to NCBI GeneId")); parent::AddRDF(parent::triplifyString($hid, $this->getVoc() . "gene-symbol", utf8_encode(htmlspecialchars($genesymbol)), "xsd:string") . parent::describeProperty($this->getVoc() . "gene-symbol", "Link to gene symbol")); parent::AddRDF(parent::triplify($hid, $this->getVoc() . "x-gi", $gi) . parent::describeProperty($this->getVoc() . "x-gi", "Link to NCBI GI")); parent::AddRDF(parent::triplify($hid, $this->getVoc() . "x-refseq", $refseq) . parent::describeProperty($this->getVoc() . "x-refseq", "Link to NCBI Refseq")); $this->WriteRDFBufferToWriteFile(); } }
function process() { $refseq_record_str = ""; while ($aLine = $this->getReadFile()->Read(40960)) { preg_match("/^\\/\\/\$/", $aLine, $matches); if (!count($matches)) { preg_match("/^\n\$/", $aLine, $matches); if (count($matches) == 0) { $refseq_record_str .= $aLine . PHP_EOL; } continue; } else { //now remove the header if it is there $refseq_record_str = $this->removeHeader($refseq_record_str); $sectionsRaw = $this->parseGenbankRaw($refseq_record_str); /** * SECTIONS being parsed: * locus, definition, accession, version, keywords, source * features **/ //get the locus section $locus = $this->retrieveSections("LOCUS", $sectionsRaw); $parsed_locus_arr = $this->parseLocus($locus); //get the definition $definition = $this->retrieveSections("DEFINITION", $sectionsRaw); $parsed_definition_arr = $this->parseDefinition($definition); //get the accession $accessions = $this->retrieveSections("ACCESSION", $sectionsRaw); $parsed_accession_arr = $this->parseAccession($accessions); //get the version $versions = $this->retrieveSections("VERSION", $sectionsRaw); $parsed_version_arr = $this->parseVersion($versions); //get the keywords $keywords = $this->retrieveSections("KEYWORDS", $sectionsRaw); $parsed_keyword_arr = $this->parseKeywords($keywords); //get the reference section $references = $this->retrieveSections("REFERENCE", $sectionsRaw); $parsed_refs_arr = $this->parseReferences($references); //get the source section $source = $this->retrieveSections("SOURCE", $sectionsRaw); $parsed_source_arr = $this->parseSource($source); //get the features $features = $this->retrieveSections("FEATURES", $sectionsRaw); $parsed_features_arr = $this->parseFeatures($features); //lets make some rdf $refseq_res = $this->getNamespace() . $parsed_version_arr['versioned_accession']; $refseq_label = utf8_encode(htmlspecialchars($parsed_definition_arr[0])); parent::AddRDF(parent::describeIndividual($refseq_res, $refseq_label, $this->getVoc() . 'refseq-record') . parent::triplifyString($refseq_res, $this->getVoc() . 'sequence-length', $parsed_locus_arr[0]['sequence_length']) . parent::triplifyString($refseq_res, $this->getVoc() . 'chromosome-shape', $parsed_locus_arr[0]['chromosome_shape']) . parent::triplifyString($refseq_res, $this->getVoc() . 'date-of-entry', $parsed_locus_arr[0]['date']) . parent::triplifyString($refseq_res, $this->getVoc() . 'source', utf8_encode($parsed_source_arr[0])) . parent::triplify($refseq_res, $this->getVoc() . 'fasta-seq', 'https://www.ncbi.nlm.nih.gov/sviewer/viewer.cgi?sendto=on&db=nucest&dopt=fasta&val=' . $parsed_version_arr['gi']) . parent::triplify('https://www.ncbi.nlm.nih.gov/sviewer/viewer.cgi?sendto=on&db=nucest&dopt=fasta&val=' . $parsed_version_arr['gi'], "rdf:type", $this->getVoc() . 'fasta-sequence')); //add the features to the rdf foreach ($parsed_features_arr as $aFeature) { $type = $aFeature['type']; $feat_desc = $this->getFeatures($type); $label = $type; $def = ''; if (isset($feat_desc['definition'])) { $def = preg_replace('/\\s\\s*/', ' ', $feat_desc['definition']); } $comment = null; // $value = $aFeature['value']; $value = str_replace("UniProtKB/Swiss-Prot", "UniProt", $aFeature['value']); // imperfect solution. $value_arr = explode("/", $value); $location = preg_replace('/\\n/', '', $value_arr[0]); $class_id = parent::getVoc() . md5($type); $feat_res = parent::getRes() . md5($type . $location . $refseq_res); $feat_label = utf8_encode($type . " " . $location . " for " . $refseq_res); if (isset($feat_desc['comment'])) { $comment = $feat_desc['comment']; $comment = preg_replace('/\\s\\s*/', ' ', $comment); $label .= " " . $comment; } parent::AddRDF(parent::describeClass($class_id, $label, parent::getVoc() . "Feature", $label, $def) . parent::describeIndividual($feat_res, $feat_label, $class_id) . parent::triplify($refseq_res, $this->getVoc() . "has-feature", $feat_res)); foreach ($value_arr as $aL) { //check if aL has an equals in it $p = "/(\\S+)\\=(.*)/"; preg_match($p, $aL, $m); if (count($m)) { if ($m[1] == "db_xref") { parent::AddRDF(parent::triplify($feat_res, "rdfs:seeAlso", str_replace("\"", "", $m[2]))); } else { parent::AddRDF(parent::triplifyString($feat_res, $this->getVoc() . $m[1], utf8_encode(str_replace("\"", "", $m[2])))); } } } } //add the accession foreach ($parsed_accession_arr[0] as $acc) { parent::AddRDF(parent::triplifyString($refseq_res, $this->getVoc() . "accession", $acc)); } //versioned accession if (isset($parsed_version_arr['versioned_accession'])) { parent::AddRDF(parent::triplifyString($refseq_res, $this->getVoc() . "versioned-accession", $parsed_version_arr['versioned_accession'])); } //keywords foreach ($parsed_keyword_arr as $akw) { parent::AddRDF(parent::triplifyString($refseq_res, $this->getVoc() . "keyword", $akw)); } //references foreach ($parsed_refs_arr as $aRef) { $r = rand(); $ref_res = $this->getRes() . md5($r); $ref_label = "reference for " . $refseq_res; if (isset($aRef['TITLE'])) { parent::AddRDF(parent::describeIndividual($ref_res, $ref_label, $this->getVoc() . "reference") . parent::triplifyString($ref_res, $this->getVoc() . "title", $aRef['TITLE'])); } if (isset($aRef['PUBMED'])) { parent::AddRDF(parent::triplify($ref_res, $this->getVoc() . "x-pubmed", 'pubmed:' . $aRef['PUBMED'])); } if (isset($aRef['AUTHORS'])) { parent::AddRDF(parent::triplifyString($ref_res, $this->getVoc() . "authors", $aRef['AUTHORS'])); } if (isset($aRef['COORDINATES'])) { parent::AddRDF(parent::triplify($refseq_res, $this->getVoc() . "reference", $ref_res) . parent::triplifyString($ref_res, $this->getVoc() . "coordinates", $aRef['COORDINATES']) . parent::triplifyString($ref_res, $this->getVoc() . "citation", $aRef['JOURNAL'])); } else { parent::AddRDF(parent::triplify($refseq_res, $this->getVoc() . "reference", $ref_res) . parent::triplifyString($ref_res, $this->getVoc() . "citation", $aRef['JOURNAL'])); } } $refseq_record_str = ""; $this->WriteRDFBufferToWriteFile(); continue; } } //while }
function process() { $gb_record_str = ""; while ($aLine = $this->getReadFile()->Read(4096)) { preg_match("/^\\/\\/\$/", $aLine, $matches); if (count($matches)) { //now remove the header if it is there $gb_record_str = $this->removeHeader($gb_record_str); $sectionsRaw = $this->parseGenbankRaw($gb_record_str); /** * SECTIONS being parsed: * locus, definition, accession, version, keywords, segment, source, reference, features */ //get locus section(s) $locus = $this->retrieveSections("LOCUS", $sectionsRaw); $parsed_locus_arr = $this->parseLocus($locus); //get the definition section $definition = $this->retrieveSections("DEFINITION", $sectionsRaw); $parsed_definition_arr = $this->parseDefinition($definition); //get the accession $accessions = $this->retrieveSections("ACCESSION", $sectionsRaw); $parsed_accession_arr = $this->parseAccession($accessions); //get the version $versions = $this->retrieveSections("VERSION", $sectionsRaw); $parsed_version_arr = $this->parseVersion($versions); //get the keywords $keywords = $this->retrieveSections("KEYWORDS", $sectionsRaw); $parsed_keyword_arr = $this->parseKeywords($keywords); //may not be any segment section $segments = $this->retrieveSections("SEGMENT", $sectionsRaw); if (!empty($segments)) { $parsed_segments_arr = $this->parseSegment($segments); } $features = $this->retrieveSections("FEATURES", $sectionsRaw); $parsed_features_arr = $this->parseFeatures($features); //get the source section $source = $this->retrieveSections("SOURCE", $sectionsRaw); $parsed_source_arr = $this->parseSource($source); $contig = $this->retrieveSections("CONTIG", $sectionsRaw); if (!empty($contig)) { $parsed_contig_arr = $this->parseContig($contig); } //get the reference section $references = $this->retrieveSections("REFERENCE", $sectionsRaw); $parsed_refs_arr = $this->parseReferences($references); $gb_res = "gi:" . $parsed_version_arr['gi']; $gb_label = utf8_encode(htmlspecialchars($parsed_definition_arr[0])); parent::AddRDF(parent::describeIndividual($gb_res, $gb_label, $this->getVoc() . "genbank-record") . parent::triplifyString($gb_res, $this->getVoc() . 'sequence-length', $parsed_locus_arr[0]['sequence_length']) . parent::triplifyString($gb_res, $this->getVoc() . 'strandedness', $parsed_locus_arr[0]['strandedness']) . parent::triplify($gb_res, "rdf:type", $this->getRes() . $parsed_locus_arr[0]['mol_type']) . parent::triplifyString($gb_res, $this->getVoc() . 'chromosome-shape', $parsed_locus_arr[0]['chromosome_shape']) . parent::triplifyString($gb_res, $this->getVoc() . 'division-name', $parsed_locus_arr[0]['division_name']) . parent::triplifyString($gb_res, $this->getVoc() . 'date-of-entry', $parsed_locus_arr[0]['date']) . parent::triplifyString($gb_res, $this->getVoc() . 'source', utf8_encode($parsed_source_arr[0])) . parent::QQuadO_URL($gb_res, $this->getVoc() . 'fasta-seq', 'https://www.ncbi.nlm.nih.gov/sviewer/viewer.cgi?sendto=on&db=nucest&dopt=fasta&val=' . $parsed_version_arr['gi'])); foreach ($parsed_features_arr as $aFeature) { //getFeatures $type = $aFeature['type']; $feat_desc = $this->getFeatures($type); $label = preg_replace('/\\s\\s*/', ' ', $feat_desc['definition']); $comment = null; $value = $aFeature['value']; $value_arr = explode("/", $value); $location = preg_replace('/\\n/', '', $value_arr[0]); $class_id = parent::getVoc() . md5($type); $feat_res = parent::getRes() . md5($type . $location . $gb_res); $feat_label = utf8_encode($type . " " . $location . " for " . $gb_res); if (isset($feat_desc['comment'])) { $comment = $feat_desc['comment']; $comment = preg_replace('/\\s\\s*/', ' ', $comment); $label .= " " . $comment; } parent::AddRDF(parent::describeClass($class_id, $label, parent::getVoc() . "Feature") . parent::describeIndividual($feat_res, $feat_label, $class_id) . parent::triplify($gb_res, $this->getVoc() . "has-feature", $feat_res)); foreach ($value_arr as $aL) { //check if aL has an equals in it $p = "/(\\S+)\\=(.*)/"; preg_match($p, $aL, $m); if (count($m)) { if ($m[1] == "db_xref") { parent::AddRDF(parent::triplify($feat_res, "rdfs:seeAlso", str_replace("\"", "", $m[2]))); } else { parent::AddRDF(parent::triplifyString($feat_res, $this->getVoc() . $m[1], utf8_encode(str_replace("\"", "", $m[2])))); } } } } foreach ($parsed_accession_arr[0] as $acc) { parent::AddRDF(parent::triplifyString($gb_res, $this->getVoc() . "accession", $acc)); } if (isset($parsed_version_arr['versioned_accession'])) { parent::AddRDF(parent::triplifyString($gb_res, $this->getVoc() . "versioned-accession", $parsed_version_arr['versioned_accession'])); } if (isset($parsed_contig_arr)) { foreach ($parsed_contig_arr as $aContig) { parent::AddRDF(parent::triplifyString($gb_res, $this->getVoc() . "contig", parent::safeLiteral($aContig))); } } foreach ($parsed_keyword_arr as $akw) { parent::AddRDF(parent::triplifyString($gb_res, $this->getVoc() . "keyword", $akw)); } if (isset($parsed_segments_arr)) { foreach ($parsed_segments_arr as $aSeg) { parent::AddRDF(parent::triplifyString($gb_res, $this->getVoc() . "segment-number", $aSeg['segment_number']) . parent::triplifyString($gb_res, $this->getVoc() . "total-segments", $aSeg['total_segments'])); } } foreach ($parsed_refs_arr as $aRef) { $r = rand(); $ref_res = $this->getRes() . md5($r); $ref_label = "reference for " . $gb_res; if (isset($aRef['TITLE'])) { parent::AddRDF(parent::describeIndividual($ref_res, $ref_label, $this->getVoc() . "reference") . parent::triplifyString($ref_res, $this->getVoc() . "title", $aRef['TITLE'])); } if (isset($aRef['PUBMED'])) { parent::AddRDF(parent::triplify($ref_res, $this->getVoc() . "x-pubmed", 'pubmed:' . $aRef['PUBMED'])); } if (isset($aRef['AUTHORS'])) { parent::AddRDF(parent::triplifyString($ref_res, $this->getVoc() . "authors", $aRef['AUTHORS'])); } parent::AddRDF(parent::triplify($gb_res, $this->getVoc() . "reference", $ref_res) . parent::triplifyString($ref_res, $this->getVoc() . "coordinates", $aRef['COORDINATES']) . parent::triplifyString($ref_res, $this->getVoc() . "citation", $aRef['JOURNAL'])); } $gb_record_str = ""; $this->WriteRDFBufferToWriteFile(); continue; } preg_match("/^\n\$/", $aLine, $matches); if (count($matches) == 0) { $gb_record_str .= $aLine; } } //while }
function gene_interactions() { while ($l = parent::getReadFile()->Read()) { if ($l[0] == '#') { continue; } $data = explode("\t", $l); if (count($data) != 11) { trigger_error("Found " . count($data) . " columns, expecting 11"); continue; } $interaction = $data[0]; $interaction_type = str_replace("_", "-", $data[1]); $interaction_type_label = str_replace("_", " ", $data[1]); $int_additional_info = $data[2]; $gene1 = $data[5]; $gene2 = $data[8]; $interaction_id = parent::getNamespace() . $interaction; if ($interaction_type == "Genetic") { $int_pred = parent::getVoc() . "genetically-interacts-with"; } elseif ($interaction_type == "Physical") { $int_pred = parent::getVoc() . "physically-interacts-with"; } elseif ($interaction_type == "Predicted") { $int_pred = parent::getVoc() . "predicted-to-interact-with"; } elseif ($interaction_type == "Regulatory") { $int_pred = parent::getVoc() . "regulates"; } //elseif if ($int_additional_info == "No_interaction") { $interaction_label = "No " . strtolower($interaction_type) . " interaction between " . $gene1 . " and " . $gene2; parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, parent::getVoc() . $interaction_type . "-Non-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Non-Interaction", $interaction_type_label . " non-interaction") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2)); $npa_id = parent::getRes() . md5($interaction_id . "negative property assertion"); $npa_label = "Negative property assertion stating that " . $gene1 . " and " . $gene2 . " do not have a " . $interaction_type_label . " interaction"; parent::addRDF(parent::describeIndividual($npa_id, $npa_label, "owl:NegativeObjectPropertyAssertion") . parent::triplify($npa_id, "owl:sourceIndividual", parent::getNamespace() . $gene1) . parent::triplify($npa_id, "owl:targetIndividual", parent::getNamespace() . $gene2) . parent::triplify($npa_id, "owl:assertionProperty", $int_pred)); } elseif ($int_additional_info == "N/A" || $int_additional_info == "Genetic_interaction") { $interaction_label = $interaction_type . " interaction between " . $gene1 . " and " . $gene2; parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, parent::getVoc() . $interaction_type . "-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Interaction", $interaction_type_label . " Interaction") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2) . parent::triplify(parent::getNamespace() . $gene1, $int_pred, parent::getNamespace() . $gene2)); } else { $interaction_label = ($int_additional_info != "" ? $int_additional_info . " " : "") . strtolower($interaction_type) . " interaction between " . $gene1 . " and " . $gene2; $type = parent::getVoc() . ($int_additional_info != "" ? $int_additional_info . "-" : "") . $interaction_type . "-Interaction"; $type_label = ($int_additional_info != "" ? $int_additional_info . " " : "") . $interaction_type_label . " Interaction"; parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, $type) . parent::describeClass($type, $type_label, parent::getVoc() . $interaction_type . "-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Interaction", $interaction_type . " Interation") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2) . parent::triplify(parent::getNamespace() . $gene1, $int_pred, parent::getNamespace() . $gene2)); } //else parent::WriteRDFBufferToWriteFile(); } //while }
function gene_expression() { $h = explode(",", parent::getReadFile()->read()); $expected_columns = 8; if (($n = count($h)) != $expected_columns) { trigger_error("Found {$n} columns in gene file - expecting {$expected_columns}!", E_USER_WARNING); return false; } while ($l = parent::getReadFile()->read(200000)) { $data = str_getcsv($l); $mgi_symbol = $data[0]; $mgi_description = $data[1]; $geneid = $data[2]; $total_datasets = $data[3]; $total_ovexp = $data[4]; $total_underexp = $data[5]; $p_value = $data[6]; $expression = $data[7]; $id = parent::getRes() . md5($geneid . $total_datasets . $total_ovexp . $total_underexp . $p_value . $expression); $evidence_id = parent::getRes() . md5($geneid . $total_datasets . $total_ovexp . $total_underexp . $p_value . $expression . "_evidence"); $label = "Dietary restriction induced " . $expression . "-expression of " . $mgi_symbol . " based on microarray results from " . $total_datasets . " datasets, with p-value " . $p_value; $type_label = "Gene " . ucfirst($expression) . " Expression"; $type = parent::getVoc() . str_replace(" ", "-", $type_label); parent::addRDF(parent::describeIndividual($id, $label, $type) . parent::describeClass($type, $type_label) . parent::triplify($id, parent::getVoc() . "gene", "ncbigene:" . $geneid) . parent::triplifyString("ncbigene:" . $geneid, parent::getVoc() . "mgi-gene-symbol", $mgi_symbol) . parent::triplifyString("ncbigene:" . $geneid, parent::getVoc() . "mgi-gene-description", $mgi_description) . parent::triplify($id, parent::getVoc() . "evidence", $evidence_id) . parent::triplifyString($id, parent::getVoc() . "perturbation-context", "dietary restriction") . parent::triplifyString($evidence_id, parent::getVoc() . "total-number-datasets", $total_datasets) . parent::triplifyString($evidence_id, parent::getVoc() . "total-number-datasets-overexpressed", $total_ovexp) . parent::triplifyString($evidence_id, parent::getVoc() . "total-number-datasets-underexpressed", $total_underexp) . parent::triplifyString($evidence_id, parent::getVoc() . "p-value", $p_value)); parent::writeRDFBufferToWriteFile(); } //while }
public function makeDescription($title, $type) { if (!$title) { return null; } $uri = parent::getRes() . md5($title); $type_uri = parent::getVoc() . str_replace(" ", "-", $type); parent::addRDF(parent::describeIndividual($uri, $title, $type_uri) . parent::describeClass($type_uri, $type)); return $uri; }
function OBO2RDF($abbv) { $abbv = strtolower($abbv); if ($abbv == "doid") { $abbv = "do"; } $minimal = parent::getParameterValue('detail') == 'min' ? true : false; $minimalp = parent::getParameterValue('detail') == 'min+' ? true : false; $version = parent::getParameterValue("bio2rdf_release"); $tid = ''; $first = true; $is_a = false; $is_deprecated = false; $min = $buf = ''; $ouri = "http://bio2rdf.org/lsr:" . $abbv; $dataset_uri = $abbv . "_resource:bio2rdf.dataset.{$abbv}.R" . $version; parent::setGraphURI($dataset_uri); $buf = parent::triplify($ouri, "rdf:type", "owl:Ontology"); $graph_uri = '<' . parent::getRegistry()->getFQURI(parent::getGraphURI()) . '>'; $bid = 1; while ($l = parent::getReadFile()->read()) { $lt = trim($l); if (strlen($lt) == 0) { continue; } if ($lt[0] == '!') { continue; } if (strstr($l, "[Term]")) { // first node? if ($first == true) { // ignore the first case $first = false; } else { if ($tid != '' && $is_a == false && $is_deprecated == false) { $t = parent::triplify($tid, "rdfs:subClassOf", "obo_vocabulary:Entity"); $buf .= $t; $min .= $t; } } $is_a = false; $is_deprecated = false; unset($typedef); $term = ''; $tid = ''; continue; } else { if (strstr($l, "[Typedef]")) { $is_a = false; $is_deprecated = false; unset($term); $tid = ''; $typedef = ''; continue; } } //echo "LINE: $l".PHP_EOL; // to fix error in obo generator $lt = str_replace("synonym ", "synonym: ", $lt); $lt = preg_replace("/\\{.*\\} !/", " !", $lt); $a = explode(" !", $lt); if (isset($a[1])) { $exc = trim($a[1]); } $a = explode(": ", trim($a[0]), 2); // let's go if (isset($intersection_of)) { if ($a[0] != "intersection_of") { // $intersection_of .= ")].".PHP_EOL; //$buf .= $intersection_of; if ($minimalp) { $min .= $intersection_of; } unset($intersection_of); } } if (isset($relationship)) { if ($a[0] != "relationship") { // $relationship .= ")].".PHP_EOL; //$buf .= $relationship; if ($minimalp) { $min .= $relationship; } unset($relationship); } } if (isset($typedef)) { if ($a[0] == "id") { $c = explode(":", $a[1]); if (count($c) == 1) { $ns = "obo"; $id = $c[0]; } else { $ns = strtolower($c[0]); $id = $c[1]; } $id = str_replace(array("(", ")"), array("_", ""), $id); $tid = $ns . ":" . $id; } else { if ($a[0] == "name") { $buf .= parent::describeClass($tid, addslashes(stripslashes($a[1]))); } else { if ($a[0] == "is_a") { if (FALSE !== ($pos = strpos($a[1], "!"))) { $a[1] = substr($a[1], 0, $pos - 1); } $buf .= parent::triplify($tid, "rdfs:subPropertyOf", "obo_vocabulary:" . strtolower($a[1])); } else { if ($a[0] == "is_obsolete") { $buf .= parent::triplify($tid, "rdf:type", "owl:DeprecatedClass"); $is_deprecated = true; } else { if ($a[0][0] == "!") { $a[0] = substr($a[0], 1); } $buf .= parent::triplifyString($tid, "obo_vocabulary:{$a['0']}", str_replace('"', '', stripslashes($a[1]))); } } } } } else { if (isset($term)) { if ($a[0] == "is_obsolete" && $a[1] == "true") { $t = parent::triplify($tid, "rdf:type", "owl:DeprecatedClass"); $t .= parent::triplify($tid, "rdfs:subClassOf", "owl:DeprecatedClass"); $min .= $t; $buf .= $t; $is_deprecated = true; } else { if ($a[0] == "id") { parent::getRegistry()->parseQName($a[1], $ns, $id); $tid = "{$ns}:{$id}"; // $buf .= parent::describeClass($tid,null,"owl:Class"); // $buf .= parent::triplify($tid,"rdfs:isDefinedBy",$ouri); } else { if ($a[0] == "name") { // $t = parent::triplifyString($tid,"rdfs:label",str_replace(array("\"", "'"), array("","\\\'"), stripslashes($a[1]))." [$tid]"); $label = str_replace(array("\"", "'"), array("", "\\\\'"), stripslashes($a[1])); $t = parent::describeIndividual($tid, $label, "owl:Class"); $t .= parent::triplify($tid, "rdfs:isDefinedBy", $ouri); $min .= $t; $buf .= $t; } else { if ($a[0] == "def") { $t = str_replace(array("'", "\"", "\\", "\\\\'"), array("\\\\'", "", "", ""), $a[1]); $min .= parent::triplifyString($tid, "dc:description", $t); $buf .= parent::triplifyString($tid, "dc:description", $t); } else { if ($a[0] == "property_value") { $b = explode(" ", $a[1]); $buf .= parent::triplifyString($tid, "obo_vocabulary:" . strtolower($b[0]), str_replace("\"", "", strtolower($b[1]))); } else { if ($a[0] == "xref") { // http://upload.wikimedia.org/wikipedia/commons/3/34/Anatomical_Directions_and_Axes.JPG // Medical Dictionary:http\://www.medterms.com/ // KEGG COMPOUND:C02788 "KEGG COMPOUND" // id-validation-regexp:\"REACT_[0-9\]\{1\,4}\\.[0-9\]\{1\,3}|[0-9\]+\" //$a[1] = 'id-validation-regexp:\"REACT_[0-9\]\{1\,4}\\.[0-9\]\{1\,3}|[0-9\]+\"'; if (substr($a[1], 0, 4) == "http") { $buf .= parent::triplify($tid, "rdfs:seeAlso", str_replace(array(" ", '"wiki"', "\\"), array("+", "", ""), $a[1])); } else { $b = explode(":", $a[1], 2); if (substr($b[1], 0, 4) == "http") { $buf .= parent::triplify($tid, "rdfs:seeAlso", stripslashes($b[1])); } else { $ns = str_replace(array(" ", "\\"), "", strtolower($b[0])); $id = trim($b[1]); // there may be a comment to remove if (FALSE !== ($pos = strrpos($id, ' "'))) { $comment = substr($id, $pos + 1, -1); $id = substr($id, 0, $pos); } $id = stripslashes($id); // there may be a source statement to remove $id = preg_replace("/{.*\\}/", "", $id); if ($ns == "pmid") { $ns = "pubmed"; $y = explode(" ", $id); $id = $y[0]; } if ($ns == "xx") { continue; } if ($ns == "icd9cm") { $y = explode(" ", $id); $id = $y[0]; } if ($ns == "xref; umls_cui") { continue; } if ($ns == "submitter") { $ns = "chebi.submitter"; } if ($ns == "wikipedia" || $ns == "mesh") { $id = str_replace(" ", "+", $id); } if ($ns == "id-validation-regexp") { $buf .= parent::triplifyString($tid, "obo_vocabulary:{$ns}", addslashes($id)); } else { $buf .= parent::triplify($tid, "obo_vocabulary:x-{$ns}", "{$ns}:" . str_replace(" ", "-", $id)); } } } } else { if ($a[0] == "synonym") { // synonym: "entidades moleculares" RELATED [IUPAC:] // synonym: "molecular entity" EXACT IUPAC_NAME [IUPAC:] // synonym: "Chondrococcus macrosporus" RELATED synonym [NCBITaxonRef:Krzemieniewska_and_Krzemieniewski_1926] //grab string inside double quotes preg_match('/"(.*)"(.*)/', $a[1], $matches); if (!empty($matches)) { $a[1] = str_replace(array("\\", "\"", "'"), array("", "", "\\\\'"), $matches[1] . $matches[2]); } else { $a[1] = str_replace(array("\"", "'"), array("", "\\\\'"), $a[1]); } $rel = "SYNONYM"; $list = array("EXACT", "BROAD", "RELATED", "NARROW"); $found = false; foreach ($list as $keyword) { // get everything after the keyword up until the bracket [ if (FALSE !== ($k_pos = strpos($a[1], $keyword))) { $str_len = strlen($a[1]); $keyword_len = strlen($keyword); $keyword_end_pos = $k_pos + $keyword_len; $b1_pos = strrpos($a[1], "["); $b2_pos = strrpos($a[1], "]"); $b_text = substr($a[1], $b1_pos + 1, $b2_pos - $b1_pos - 1); $diff = $b1_pos - $keyword_end_pos - 1; if ($diff != 0) { // then there is more stuff here $k = substr($a[1], $keyword_end_pos + 1, $diff); $rel = trim($k); } else { // create the long predicate $rel = $keyword . "_SYNONYM"; } $found = true; $str = substr($a[1], 0, $k_pos - 1); break; } } // check to see if we still haven't found anything if ($found === false) { // we didn't find one of the keywords // so take from the start to the bracket $b1_pos = strrpos($a[1], "["); $str = substr($a[1], 0, $b1_pos - 1); } $rel = str_replace(" ", "_", $rel); // $lit = addslashes($str.($b_text?" [".$b_text."]":"")); $l = parent::triplifyString($tid, "obo_vocabulary:" . strtolower($rel), $str); $buf .= $l; } else { if ($a[0] == "alt_id") { parent::getRegistry()->parseQname($a[1], $ns, $id); if ($id != 'curators') { $buf .= parent::triplify("{$ns}:{$id}", "rdfs:seeAlso", stripslashes($tid)); } } else { if ($a[0] == "is_a") { // do subclassing parent::getRegistry()->parseQName($a[1], $ns, $id); $t = parent::triplify($tid, "rdfs:subClassOf", "{$ns}:{$id}"); $buf .= $t; $min .= $t; $is_a = true; } else { if ($a[0] == "intersection_of") { if (!isset($intersection_of)) { // $intersection_of = '<'.parent::getRegistry()->getFQURI($tid).'> <'.parent::getRegistry()->getFQURI('owl:equivalentClass').'> [<'.parent::getRegistry()->getFQURI('rdf:type').'> <'.parent::getRegistry()->getFQURI('owl:Class').'>; <'.parent::getRegistry()->getFQURI('owl:intersectionOf').'> ('; $intersection_of = '<' . parent::getRegistry()->getFQURI($tid) . '> <' . parent::getRegistry()->getFQURI('owl:equivalentClass') . '> _:b' . ++$bid . " {$graph_uri} ." . PHP_EOL; $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('rdf:type') . '> <' . parent::getRegistry()->getFQURI('owl:Class') . "> {$graph_uri} ." . PHP_EOL; $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:intersectionOf') . '> _:b' . ++$bid . " {$graph_uri} ." . PHP_EOL; } /* intersection_of: ECO:0000206 ! BLAST evidence intersection_of: develops_from VAO:0000092 ! chondrogenic condensation intersection_of: OBO_REL:has_part VAO:0000040 ! cartilage tissue */ $c = explode(" ", $a[1]); if (count($c) == 1) { // just a class parent::getRegistry()->parseQName($c[0], $ns, $id); $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('rdfs:subClassOf') . '> <' . parent::getRegistry()->getFQURI("{$ns}:{$id}") . "> {$graph_uri} ." . PHP_EOL; $buf .= parent::triplify($tid, "rdfs:subClassOf", "{$ns}:{$id}"); } else { if (count($c) == 2) { // an expression parent::getRegistry()->parseQName($c[0], $pred_ns, $pred_id); parent::getRegistry()->parseQName($c[1], $obj_ns, $obj_id); $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:onProperty') . '> <' . parent::getRegistry()->getFQURI("obo_vocabulary:" . $pred_id) . "> {$graph_uri} ." . PHP_EOL; $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:someValuesFrom') . '> <' . parent::getRegistry()->getFQURI("{$obj_ns}:{$obj_id}") . "> {$graph_uri} ." . PHP_EOL; $buf .= parent::triplify($tid, "obo_vocabulary:{$pred_id}", "{$obj_ns}:{$obj_id}"); } } } else { if ($a[0] == "relationship") { if (!isset($relationship)) { $relationship = '<' . parent::getRegistry()->getFQURI($tid) . '> <' . parent::getRegistry()->getFQURI('rdfs:subClassOf') . '> _:b' . ++$bid . " {$graph_uri} ." . PHP_EOL; $relationship .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('rdf:type') . '> <' . parent::getRegistry()->getFQURI('owl:Class') . "> {$graph_uri} ." . PHP_EOL; $relationship .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:intersectionOf') . '> _:b' . ++$bid . " {$graph_uri} ." . PHP_EOL; } /* relationship: develops_from VAO:0000092 ! chondrogenic condensation relationship: OBO_REL:has_part VAO:0000040 ! cartilage tissue */ $c = explode(" ", $a[1]); if (count($c) == 1) { // just a class parent::getRegistry()->parseQName($c[0], $ns, $id); $relationship .= parent::getRegistry()->getFQURI("{$ns}:{$id}"); $buf .= parent::triplify($tid, "rdfs:subClassOf", "{$ns}:{$id}"); } else { if (count($c) == 2) { // an expression parent::getRegistry()->parseQName($c[0], $pred_ns, $pred_id); parent::getRegistry()->parseQName($c[1], $obj_ns, $obj_id); $relationship .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:onProperty') . '> <' . parent::getRegistry()->getFQURI("obo_vocabulary:" . $pred_id) . "> {$graph_uri} ." . PHP_EOL; $relationship .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:someValuesFrom') . '> <' . parent::getRegistry()->getFQURI("{$obj_ns}:{$obj_id}") . "> {$graph_uri} ." . PHP_EOL; $buf .= parent::triplify($tid, "obo_vocabulary:{$pred_id}", "{$obj_ns}:{$obj_id}"); } } } else { // default handler if (isset($a[1])) { $buf .= parent::triplifyString($tid, "obo_vocabulary:{$a['0']}", str_replace(array("\"", "'"), array("", "\\\\'"), stripslashes($a[1]))); } } } } } } } } } } } } } else { //header //format-version: 1.0 $buf .= parent::triplifyString($ouri, "obo_vocabulary:{$a['0']}", str_replace(array('"', '\\:'), array('\\"', ':'), isset($a[1]) ? $a[1] : "")); } } if ($minimal || $minimalp) { parent::getWriteFile()->write($min); } else { parent::getWriteFile()->write($buf); } $min = ''; $buf = ''; $header = ''; } //if(isset($intersection_of)) $buf .= $intersection_of.")].".PHP_EOL; //if(isset($relationship)) $buf .= $relationship.")].".PHP_EOL; if ($minimal || $minimalp) { parent::getWriteFile()->Write($min); } else { parent::getWriteFile()->write($buf); } }