function Parse($xml) { // state the dataset info foreach ($xml->release->dbinfo as $o) { $db = $o->attributes()->dbname . " v" . $o->attributes()->version . " (" . $o->attributes()->entry_count . " entries) [" . $o->attributes()->file_date . "]"; parent::addRDF(parent::triplifyString(parent::getDatasetURI(), parent::getVoc() . "contains", $db)); if ((string) $o->attributes()->dbname === "INTERPRO") { parent::setDatasetVersion($o->attributes()->version); } } // get a potential id list if (parent::getParameterValue("id_list") != '') { $id_list = explode(",", parent::getParameterValue("id_list")); } // now interate over the entries foreach ($xml->interpro as $o) { parent::writeRDFBufferToWriteFile(); $interpro_id = $o->attributes()->id; if (isset($id_list) && !in_array($interpro_id, $id_list)) { continue; } echo "Processing {$interpro_id}" . PHP_EOL; $name = $o->name; $short_name = $o->attributes()->short_name; $type = $o->attributes()->type; $s = parent::getNamespace() . $interpro_id; //echo "Adding... $s rdfs:label $name ($short_name) $type [$s]".PHP_EOL; parent::addRDF(parent::describeIndividual($s, "{$name} ({$short_name}) {$type}", parent::getVoc() . $type)); // get the pubs unset($pubs); foreach ($o->pub_list->publication as $p) { $pid = (string) $p->attributes()->id; if (isset($p->db_xref)) { if ($p->db_xref->attributes()->db == "PUBMED") { $pmid = (string) $p->db_xref->attributes()->dbkey; $pubs['pid'][] = '<cite idref="' . $pid . '"/>'; $pubs['pmid'][] = '<a href="' . $pmid . '">pubmed:' . $pmid . '</a>'; parent::addRDF(parent::triplify($s, parent::getVoc() . "x-pubmed", "pubmed:{$pmid}")); } } } $abstract = (string) $o->abstract->p->asXML(); if (isset($pubs)) { $abstract = str_replace($pubs['pid'], $pubs['pmid'], $abstract); } parent::addRDF(parent::triplifyString($s, "dc:description", $abstract)); if (isset($o->example_list)) { foreach ($o->example_list->example as $example) { $db = (string) $example->db_xref->attributes()->db; $id = (string) $example->db_xref->attributes()->dbkey; parent::addRDF(parent::triplify($s, parent::getVoc() . "example-entry", "{$db}:{$id}")); } } if (isset($o->parent_list->rel_ref)) { foreach ($o->parent_list->rel_ref as $parent) { $id = (string) $parent->attributes()->ipr_ref; parent::addRDF(parent::triplify($s, parent::getVoc() . "parent", "interpro:{$id}")); } } if (isset($o->child->rel_ref)) { foreach ($o->child->rel_ref as $child) { $id = (string) $child->attributes()->ipr_ref; parent::addRDF(parent::triplify($s, parent::getVoc() . "child", "interpro:{$id}")); } } if (isset($o->contains->rel_ref)) { foreach ($o->contains->rel_ref as $contains) { $id = (string) $contains->attributes()->ipr_ref; parent::addRDF(parent::triplify($s, parent::getVoc() . "contains", "interpro:{$id}")); } } if (isset($o->found_in->rel_ref)) { foreach ($o->found_in->rel_ref as $f) { $id = (string) $f->attributes()->ipr_ref; parent::addRDF(parent::triplify($s, parent::getVoc() . "found-in", "interpro:{$id}")); } } if (isset($o->sec_list->sec_ac)) { foreach ($o->sec_ac as $s) { $id = (string) $s->attributes()->acc; parent::addRDF(parent::triplify($s, parent::getVoc() . "secondary-accession", "interpro:{$id}")); } } // xrefs if (isset($o->member_list->dbxref)) { foreach ($o->member_list->db_xref as $dbxref) { $db = (string) $dbxref->attributes()->db; $id = (string) $dbxref->attributes()->dbkey; parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}")); } } if (isset($o->external_doc_list)) { foreach ($o->external_doc_list->db_xref as $dbxref) { $db = (string) $dbxref->attributes()->db; $id = (string) $dbxref->attributes()->dbkey; parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}")); } } if (isset($o->structure_db_links->db_xref)) { foreach ($o->structure_db_links->db_xref as $dbxref) { $db = (string) $dbxref->attributes()->db; $id = (string) $dbxref->attributes()->dbkey; parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}")); } } // taxon distribution foreach ($o->taxonomy_distribution->taxon_data as $t) { $organism = (string) $t->attributes()->name; $number = (string) $t->attributes()->proteins_count; parent::addRDF(parent::triplifyString($s, parent::getVoc() . "taxon-distribution", "{$organism} ({$number})")); } } }
function ParseEntry($obj, $type) { $o = $obj["omim"]["entryList"][0]["entry"]; $omim_id = $o['mimNumber']; $omim_uri = parent::getNamespace() . $o['mimNumber']; if (isset($o['version'])) { parent::setDatasetVersion($o['version']); } // add the links parent::addRDF($this->QQuadO_URL($omim_uri, "rdfs:seeAlso", "" . $omim_id)); parent::addRDF($this->QQuadO_URL($omim_uri, "owl:sameAs", "" . $omim_id)); // parse titles $titles = $o['titles']; parent::addRDF(parent::describeIndividual($omim_uri, $titles['preferredTitle'], parent::getVoc() . str_replace(array(" ", "/"), "-", ucfirst($type))) . parent::describeClass(parent::getVoc() . str_replace(array(" ", "/"), "-", ucfirst($type)), $type)); if (isset($titles['preferredTitle'])) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "preferred-title", $titles['preferredTitle'])); } if (isset($titles['alternativeTitles'])) { $b = explode(";;", $titles['alternativeTitles']); foreach ($b as $title) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "alternative-title", trim($title))); } } // parse text sections if (isset($o['textSectionList'])) { foreach ($o['textSectionList'] as $i => $section) { if ($section['textSection']['textSectionTitle'] == "Description") { parent::addRDF(parent::triplifyString($omim_uri, "dc:description", $section['textSection']['textSectionContent'])); } else { $p = str_replace(" ", "-", strtolower($section['textSection']['textSectionTitle'])); parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "{$p}", $section['textSection']['textSectionContent'])); } // parse the omim references preg_match_all("/\\{([0-9]{6})\\}/", $section['textSection']['textSectionContent'], $m); if (isset($m[1][0])) { foreach ($m[1] as $oid) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "refers-to", "omim:{$oid}")); } } } } // allelic variants if (isset($o['allelicVariantList'])) { foreach ($o['allelicVariantList'] as $i => $v) { $v = $v['allelicVariant']; $uri = parent::getRes() . "{$omim_id}" . "_allele_" . $i; $label = str_replace("\n", " ", $v['name']); parent::addRDF(parent::describeIndividual($uri, $label, parent::getVoc() . "Allelic-Variant") . parent::describeClass(parent::getVoc() . "Allelic-Variant", "Allelic Variant")); if (isset($v['alternativeNames'])) { $names = explode(";;", $v['alternativeNames']); foreach ($names as $name) { $name = str_replace("\n", " ", $name); parent::addRDF(parent::triplifyString($uri, parent::getVoc() . "alternative-names", $name)); } } if (isset($v['text'])) { parent::addRDF(parent::triplifyString($uri, "dc:description", $v['text'])); } if (isset($v['mutations'])) { parent::addRDF(parent::triplifyString($uri, parent::getVoc() . "mutation", $v['mutations'])); } if (isset($v['dbSnps'])) { $snps = explode(",", $v['dbSnps']); foreach ($snps as $snp) { parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-dbsnp", "dbsnp:" . $snp)); } } parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "variant", $uri)); } } // clinical synopsis if (isset($o['clinicalSynopsis'])) { $cs = $o['clinicalSynopsis']; $cs_uri = parent::getRes() . "" . $omim_id . "_cs"; parent::addRDF(parent::describeIndividual($cs_uri, "Clinical synopsis for omim {$omim_id}", parent::getVoc() . "Clinical-Synopsis") . parent::describeClass(parent::getVoc() . "Clinical-Synopsis", "Clinical Synopsis") . parent::triplify($omim_uri, parent::getVoc() . "clinical-synopsis", $cs_uri)); foreach ($cs as $k => $v) { if (!strstr($k, "Exists")) { // ignore the boolean assertion. // @todo ignore provenance for now if (in_array($k, array('contributors', 'creationDate', 'editHistory', 'epochCreated', 'dateCreated', 'epochUpdated', 'dateUpdated'))) { continue; } if (!is_array($v)) { $v = array($k => $v); } foreach ($v as $k1 => $v1) { $phenotypes = explode(";", $v1); foreach ($phenotypes as $coded_phenotype) { // parse out the codes $coded_phenotype = trim($coded_phenotype); if (!$coded_phenotype) { continue; } $phenotype = preg_replace("/\\{.*\\}/", "", $coded_phenotype); $phenotype_id = parent::getRes() . "" . md5(strtolower($phenotype)); $entity_id = parent::getRes() . "" . $k1; parent::addRDF(parent::describeIndividual($phenotype_id, $phenotype, parent::getVoc() . 'Characteristic') . parent::describeClass(parent::getVoc() . 'Characteristic', 'Characteristic') . parent::triplify($cs_uri, parent::getVoc() . "feature", $phenotype_id) . parent::describeIndividual($entity_id, $k1, parent::getVoc() . "Entity") . parent::describeClass(parent::getVoc() . "Entity", "Entity") . parent::triplify($phenotype_id, parent::getVoc() . "characteristic-of", $entity_id)); // parse out the vocab references preg_match_all("/\\{([0-9A-Za-z \\:\\-\\.]+)\\}|;/", $coded_phenotype, $codes); //preg_match_all("/((UMLS|HPO HP|SNOMEDCT|ICD10CM|ICD9CM|EOM ID)\:[A-Z0-9]+)/",$coded_phenotype,$m); if (isset($codes[1][0])) { foreach ($codes[1] as $entry) { $entries = explode(" ", trim($entry)); foreach ($entries as $e) { if ($e == "HPO" || $e == "EOM") { continue; } $this->getRegistry()->parseQName($e, $ns, $id); if (!isset($ns) || $ns == '') { $b = explode(".", $id); $ns = "omim"; $id = $b[0]; } else { $ns = str_replace(array("hpo", "id", "icd10cm", "icd9cm", "snomedct"), array("hp", "eom", "icd10", "icd9", "snomed"), $ns); } parent::addRDF(parent::triplify($phenotype_id, parent::getVoc() . "x-{$ns}", "{$ns}:{$id}")); } // foreach } // foreach } // codes } //foreach } // foreach } // exists } } // clinical synopsis // genemap if (isset($o['geneMap'])) { $map = $o['geneMap']; if (isset($map['chromosome'])) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "chromosome", (string) $map['chromosome'])); } if (isset($map['cytoLocation'])) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "cytolocation", (string) $map['cytoLocation'])); } if (isset($map['geneSymbols'])) { $b = preg_split("/[,;\\. ]+/", $map['geneSymbols']); foreach ($b as $symbol) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "gene-symbol", "symbol:" . trim($symbol))); } } if (isset($map['geneName'])) { $b = explode(",", $map['geneName']); foreach ($b as $name) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "gene-name", trim($name))); } } if (isset($map['mappingMethod'])) { $b = explode(",", $map['mappingMethod']); foreach ($b as $c) { $mapping_method = trim($c); $method_uri = $this->get_method_type($mapping_method); if ($method_uri !== false) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "mapping-method", $method_uri)); } } } if (isset($map['mouseGeneSymbol'])) { $b = explode(",", $map['mouseGeneSymbol']); foreach ($b as $c) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "mouse-gene-symbol", "symbol:" . strtoupper($c))); } } if (isset($map['mouseMgiID'])) { $b = explode(",", $map['mouseMgiID']); foreach ($b as $c) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-mgi", $c)); } } if (isset($map['geneInheritance']) && $map['geneInheritance'] != '') { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "gene-inheritance", $map['geneInheritance'])); } } if (isset($o['phenotypeMapList'])) { foreach ($o['phenotypeMapList'] as $i => $phenotypeMap) { $phenotypeMap = $phenotypeMap['phenotypeMap']; $pm_uri = parent::getRes() . $omim_id . "_pm_" . ($i + 1); parent::addRDF(parent::describeIndividual($pm_uri, "phenotype mapping for {$omim_id}", parent::getVoc() . "Phenotype-Map") . parent::describeClass(parent::getVoc() . "Phenotype-Map", "OMIM Phenotype-Map") . parent::triplify($omim_uri, parent::getVoc() . "phenotype-map", $pm_uri)); foreach (array_keys($phenotypeMap) as $k) { if (in_array($k, array("mimNumber", "phenotypeMimNumber", "phenotypicSeriesMimNumber"))) { parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . $k, "omim:" . $phenotypeMap[$k])); } else { if ($k == "geneSymbols") { $l = explode(", ", $phenotypeMap[$k]); foreach ($l as $gene) { parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . "gene-symbol", "hgnc.symbol:" . $gene)); } } else { if ($k == "phenotypeMappingKey") { $l = $this->get_phenotype_mapping_method_type($phenotypeMap[$k]); parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . "mapping-method", $l)); } else { parent::addRDF(parent::triplifyString($pm_uri, parent::getVoc() . $k, $phenotypeMap[$k])); } } } } } } // references if (isset($o['referenceList'])) { foreach ($o['referenceList'] as $i => $r) { $r = $r['reference']; if (isset($r['pubmedID'])) { $pubmed_uri = "pubmed:" . $r['pubmedID']; parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "article", $pubmed_uri)); $title = 'article'; if (isset($r['title'])) { $title = $r['title']; } parent::addRDF(parent::describe($pubmed_uri, addslashes($r['title']))); if (isset($r['articleUrl'])) { parent::addRDF($this->QQuadO_URL($pubmed_uri, "rdfs:seeAlso", htmlentities($r['articleUrl']))); } } } } // external ids if (isset($o['externalLinks'])) { foreach ($o['externalLinks'] as $k => $id) { if ($id === false) { continue; } $ns = ''; switch ($k) { case 'approvedGeneSymbols': $ns = 'symbol'; break; case 'geneIDs': $ns = 'ncbigene'; break; case 'ncbiReferenceSequences': $ns = 'gi'; break; case 'genbankNucleotideSequences': $ns = 'gi'; break; case 'proteinSequences': $ns = 'gi'; break; case 'uniGenes': $ns = 'unigene'; break; case 'ensemblIDs': $ns = 'ensembl'; break; case 'swissProtIDs': $ns = 'uniprot'; break; case 'mgiIDs': $ns = 'mgi'; $b = explode(":", $id); $id = $b[1]; break; case 'flybaseIDs': $ns = 'flybase'; break; case 'zfinIDs': $ns = 'zfin'; break; case 'hprdIDs': $ns = 'hprd'; break; case 'orphanetDiseases': $ns = 'orphanet'; break; case 'refSeqAccessionIDs': $ns = 'refseq'; break; case 'ordrDiseases': $ns = 'ordr'; $b = explode(";;", $id); $id = $b[0]; break; case 'snomedctIDs': $ns = 'snomed'; break; case 'icd10cmIDs': $ns = 'icd10'; break; case 'icd9cmIDs': $ns = 'icd9'; break; case 'umlsIDs': $ns = 'umls'; break; case 'wormbaseIDs': $ns = 'wormbase'; break; case 'diseaseOntologyIDs': $ns = 'do'; break; // specifically ignorning // specifically ignorning case 'geneTests': case 'cmgGene': case 'geneticAllianceIDs': // # // # case 'nextGxDx': case 'nbkIDs': // NBK1207;;Alport Syndrome and Thin Basement Membrane Nephropathy // NBK1207;;Alport Syndrome and Thin Basement Membrane Nephropathy case 'newbornScreeningUrls': case 'decipherUrls': case 'geneReviewShortNames': case 'locusSpecificDBs': case 'geneticsHomeReferenceIDs': case 'omiaIDs': case 'coriellDiseases': case 'clinicalDiseaseIDs': case 'possumSyndromes': case 'keggPathways': case 'gtr': case 'gwasCatalog': case 'mgiHumanDisease': case 'wormbaseDO': case 'dermAtlas': // true/false break; default: echo "unhandled external link {$k} {$id}" . PHP_EOL; } $ids = explode(",", $id); foreach ($ids as $id) { if ($ns) { if (strstr($id, ";;") === FALSE) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-{$ns}", $ns . ':' . $id)); } else { $b = explode(";;", $id); // multiple ids//names foreach ($b as $c) { preg_match("/([a-z])/", $c, $m); if (!isset($m[1])) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-{$ns}", $ns . ':' . $c)); } } } } } } } //external links }