Beispiel #1
0
 function Parse($xml)
 {
     // state the dataset info
     foreach ($xml->release->dbinfo as $o) {
         $db = $o->attributes()->dbname . " v" . $o->attributes()->version . " (" . $o->attributes()->entry_count . " entries) [" . $o->attributes()->file_date . "]";
         parent::addRDF(parent::triplifyString(parent::getDatasetURI(), parent::getVoc() . "contains", $db));
         if ((string) $o->attributes()->dbname === "INTERPRO") {
             parent::setDatasetVersion($o->attributes()->version);
         }
     }
     // get a potential id list
     if (parent::getParameterValue("id_list") != '') {
         $id_list = explode(",", parent::getParameterValue("id_list"));
     }
     // now interate over the entries
     foreach ($xml->interpro as $o) {
         parent::writeRDFBufferToWriteFile();
         $interpro_id = $o->attributes()->id;
         if (isset($id_list) && !in_array($interpro_id, $id_list)) {
             continue;
         }
         echo "Processing {$interpro_id}" . PHP_EOL;
         $name = $o->name;
         $short_name = $o->attributes()->short_name;
         $type = $o->attributes()->type;
         $s = parent::getNamespace() . $interpro_id;
         //echo "Adding... $s rdfs:label $name ($short_name) $type [$s]".PHP_EOL;
         parent::addRDF(parent::describeIndividual($s, "{$name} ({$short_name}) {$type}", parent::getVoc() . $type));
         // get the pubs
         unset($pubs);
         foreach ($o->pub_list->publication as $p) {
             $pid = (string) $p->attributes()->id;
             if (isset($p->db_xref)) {
                 if ($p->db_xref->attributes()->db == "PUBMED") {
                     $pmid = (string) $p->db_xref->attributes()->dbkey;
                     $pubs['pid'][] = '<cite idref="' . $pid . '"/>';
                     $pubs['pmid'][] = '<a href="http://www.ncbi.nlm.nih.gov/pubmed/' . $pmid . '">pubmed:' . $pmid . '</a>';
                     parent::addRDF(parent::triplify($s, parent::getVoc() . "x-pubmed", "pubmed:{$pmid}"));
                 }
             }
         }
         $abstract = (string) $o->abstract->p->asXML();
         if (isset($pubs)) {
             $abstract = str_replace($pubs['pid'], $pubs['pmid'], $abstract);
         }
         parent::addRDF(parent::triplifyString($s, "dc:description", $abstract));
         if (isset($o->example_list)) {
             foreach ($o->example_list->example as $example) {
                 $db = (string) $example->db_xref->attributes()->db;
                 $id = (string) $example->db_xref->attributes()->dbkey;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "example-entry", "{$db}:{$id}"));
             }
         }
         if (isset($o->parent_list->rel_ref)) {
             foreach ($o->parent_list->rel_ref as $parent) {
                 $id = (string) $parent->attributes()->ipr_ref;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "parent", "interpro:{$id}"));
             }
         }
         if (isset($o->child->rel_ref)) {
             foreach ($o->child->rel_ref as $child) {
                 $id = (string) $child->attributes()->ipr_ref;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "child", "interpro:{$id}"));
             }
         }
         if (isset($o->contains->rel_ref)) {
             foreach ($o->contains->rel_ref as $contains) {
                 $id = (string) $contains->attributes()->ipr_ref;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "contains", "interpro:{$id}"));
             }
         }
         if (isset($o->found_in->rel_ref)) {
             foreach ($o->found_in->rel_ref as $f) {
                 $id = (string) $f->attributes()->ipr_ref;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "found-in", "interpro:{$id}"));
             }
         }
         if (isset($o->sec_list->sec_ac)) {
             foreach ($o->sec_ac as $s) {
                 $id = (string) $s->attributes()->acc;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "secondary-accession", "interpro:{$id}"));
             }
         }
         // xrefs
         if (isset($o->member_list->dbxref)) {
             foreach ($o->member_list->db_xref as $dbxref) {
                 $db = (string) $dbxref->attributes()->db;
                 $id = (string) $dbxref->attributes()->dbkey;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}"));
             }
         }
         if (isset($o->external_doc_list)) {
             foreach ($o->external_doc_list->db_xref as $dbxref) {
                 $db = (string) $dbxref->attributes()->db;
                 $id = (string) $dbxref->attributes()->dbkey;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}"));
             }
         }
         if (isset($o->structure_db_links->db_xref)) {
             foreach ($o->structure_db_links->db_xref as $dbxref) {
                 $db = (string) $dbxref->attributes()->db;
                 $id = (string) $dbxref->attributes()->dbkey;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}"));
             }
         }
         // taxon distribution
         foreach ($o->taxonomy_distribution->taxon_data as $t) {
             $organism = (string) $t->attributes()->name;
             $number = (string) $t->attributes()->proteins_count;
             parent::addRDF(parent::triplifyString($s, parent::getVoc() . "taxon-distribution", "{$organism} ({$number})"));
         }
     }
 }
Beispiel #2
0
 function ParseEntry($obj, $type)
 {
     $o = $obj["omim"]["entryList"][0]["entry"];
     $omim_id = $o['mimNumber'];
     $omim_uri = parent::getNamespace() . $o['mimNumber'];
     if (isset($o['version'])) {
         parent::setDatasetVersion($o['version']);
     }
     // add the links
     parent::addRDF($this->QQuadO_URL($omim_uri, "rdfs:seeAlso", "http://omim.org/entry/" . $omim_id));
     parent::addRDF($this->QQuadO_URL($omim_uri, "owl:sameAs", "http://identifiers.org/omim/" . $omim_id));
     // parse titles
     $titles = $o['titles'];
     parent::addRDF(parent::describeIndividual($omim_uri, $titles['preferredTitle'], parent::getVoc() . str_replace(array(" ", "/"), "-", ucfirst($type))) . parent::describeClass(parent::getVoc() . str_replace(array(" ", "/"), "-", ucfirst($type)), $type));
     if (isset($titles['preferredTitle'])) {
         parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "preferred-title", $titles['preferredTitle']));
     }
     if (isset($titles['alternativeTitles'])) {
         $b = explode(";;", $titles['alternativeTitles']);
         foreach ($b as $title) {
             parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "alternative-title", trim($title)));
         }
     }
     // parse text sections
     if (isset($o['textSectionList'])) {
         foreach ($o['textSectionList'] as $i => $section) {
             if ($section['textSection']['textSectionTitle'] == "Description") {
                 parent::addRDF(parent::triplifyString($omim_uri, "dc:description", $section['textSection']['textSectionContent']));
             } else {
                 $p = str_replace(" ", "-", strtolower($section['textSection']['textSectionTitle']));
                 parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "{$p}", $section['textSection']['textSectionContent']));
             }
             // parse the omim references
             preg_match_all("/\\{([0-9]{6})\\}/", $section['textSection']['textSectionContent'], $m);
             if (isset($m[1][0])) {
                 foreach ($m[1] as $oid) {
                     parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "refers-to", "omim:{$oid}"));
                 }
             }
         }
     }
     // allelic variants
     if (isset($o['allelicVariantList'])) {
         foreach ($o['allelicVariantList'] as $i => $v) {
             $v = $v['allelicVariant'];
             $uri = parent::getRes() . "{$omim_id}" . "_allele_" . $i;
             $label = str_replace("\n", " ", $v['name']);
             parent::addRDF(parent::describeIndividual($uri, $label, parent::getVoc() . "Allelic-Variant") . parent::describeClass(parent::getVoc() . "Allelic-Variant", "Allelic Variant"));
             if (isset($v['alternativeNames'])) {
                 $names = explode(";;", $v['alternativeNames']);
                 foreach ($names as $name) {
                     $name = str_replace("\n", " ", $name);
                     parent::addRDF(parent::triplifyString($uri, parent::getVoc() . "alternative-names", $name));
                 }
             }
             if (isset($v['text'])) {
                 parent::addRDF(parent::triplifyString($uri, "dc:description", $v['text']));
             }
             if (isset($v['mutations'])) {
                 parent::addRDF(parent::triplifyString($uri, parent::getVoc() . "mutation", $v['mutations']));
             }
             if (isset($v['dbSnps'])) {
                 $snps = explode(",", $v['dbSnps']);
                 foreach ($snps as $snp) {
                     parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-dbsnp", "dbsnp:" . $snp));
                 }
             }
             parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "variant", $uri));
         }
     }
     // clinical synopsis
     if (isset($o['clinicalSynopsis'])) {
         $cs = $o['clinicalSynopsis'];
         $cs_uri = parent::getRes() . "" . $omim_id . "_cs";
         parent::addRDF(parent::describeIndividual($cs_uri, "Clinical synopsis for omim {$omim_id}", parent::getVoc() . "Clinical-Synopsis") . parent::describeClass(parent::getVoc() . "Clinical-Synopsis", "Clinical Synopsis") . parent::triplify($omim_uri, parent::getVoc() . "clinical-synopsis", $cs_uri));
         foreach ($cs as $k => $v) {
             if (!strstr($k, "Exists")) {
                 // ignore the boolean assertion.
                 // @todo ignore provenance for now
                 if (in_array($k, array('contributors', 'creationDate', 'editHistory', 'epochCreated', 'dateCreated', 'epochUpdated', 'dateUpdated'))) {
                     continue;
                 }
                 if (!is_array($v)) {
                     $v = array($k => $v);
                 }
                 foreach ($v as $k1 => $v1) {
                     $phenotypes = explode(";", $v1);
                     foreach ($phenotypes as $coded_phenotype) {
                         // parse out the codes
                         $coded_phenotype = trim($coded_phenotype);
                         if (!$coded_phenotype) {
                             continue;
                         }
                         $phenotype = preg_replace("/\\{.*\\}/", "", $coded_phenotype);
                         $phenotype_id = parent::getRes() . "" . md5(strtolower($phenotype));
                         $entity_id = parent::getRes() . "" . $k1;
                         parent::addRDF(parent::describeIndividual($phenotype_id, $phenotype, parent::getVoc() . 'Characteristic') . parent::describeClass(parent::getVoc() . 'Characteristic', 'Characteristic') . parent::triplify($cs_uri, parent::getVoc() . "feature", $phenotype_id) . parent::describeIndividual($entity_id, $k1, parent::getVoc() . "Entity") . parent::describeClass(parent::getVoc() . "Entity", "Entity") . parent::triplify($phenotype_id, parent::getVoc() . "characteristic-of", $entity_id));
                         // parse out the vocab references
                         preg_match_all("/\\{([0-9A-Za-z \\:\\-\\.]+)\\}|;/", $coded_phenotype, $codes);
                         //preg_match_all("/((UMLS|HPO HP|SNOMEDCT|ICD10CM|ICD9CM|EOM ID)\:[A-Z0-9]+)/",$coded_phenotype,$m);
                         if (isset($codes[1][0])) {
                             foreach ($codes[1] as $entry) {
                                 $entries = explode(" ", trim($entry));
                                 foreach ($entries as $e) {
                                     if ($e == "HPO" || $e == "EOM") {
                                         continue;
                                     }
                                     $this->getRegistry()->parseQName($e, $ns, $id);
                                     if (!isset($ns) || $ns == '') {
                                         $b = explode(".", $id);
                                         $ns = "omim";
                                         $id = $b[0];
                                     } else {
                                         $ns = str_replace(array("hpo", "id", "icd10cm", "icd9cm", "snomedct"), array("hp", "eom", "icd10", "icd9", "snomed"), $ns);
                                     }
                                     parent::addRDF(parent::triplify($phenotype_id, parent::getVoc() . "x-{$ns}", "{$ns}:{$id}"));
                                 }
                                 // foreach
                             }
                             // foreach
                         }
                         // codes
                     }
                     //foreach
                 }
                 // foreach
             }
             // exists
         }
     }
     // clinical synopsis
     // genemap
     if (isset($o['geneMap'])) {
         $map = $o['geneMap'];
         if (isset($map['chromosome'])) {
             parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "chromosome", (string) $map['chromosome']));
         }
         if (isset($map['cytoLocation'])) {
             parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "cytolocation", (string) $map['cytoLocation']));
         }
         if (isset($map['geneSymbols'])) {
             $b = preg_split("/[,;\\. ]+/", $map['geneSymbols']);
             foreach ($b as $symbol) {
                 parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "gene-symbol", "symbol:" . trim($symbol)));
             }
         }
         if (isset($map['geneName'])) {
             $b = explode(",", $map['geneName']);
             foreach ($b as $name) {
                 parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "gene-name", trim($name)));
             }
         }
         if (isset($map['mappingMethod'])) {
             $b = explode(",", $map['mappingMethod']);
             foreach ($b as $c) {
                 $mapping_method = trim($c);
                 $method_uri = $this->get_method_type($mapping_method);
                 if ($method_uri !== false) {
                     parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "mapping-method", $method_uri));
                 }
             }
         }
         if (isset($map['mouseGeneSymbol'])) {
             $b = explode(",", $map['mouseGeneSymbol']);
             foreach ($b as $c) {
                 parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "mouse-gene-symbol", "symbol:" . strtoupper($c)));
             }
         }
         if (isset($map['mouseMgiID'])) {
             $b = explode(",", $map['mouseMgiID']);
             foreach ($b as $c) {
                 parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-mgi", $c));
             }
         }
         if (isset($map['geneInheritance']) && $map['geneInheritance'] != '') {
             parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "gene-inheritance", $map['geneInheritance']));
         }
     }
     if (isset($o['phenotypeMapList'])) {
         foreach ($o['phenotypeMapList'] as $i => $phenotypeMap) {
             $phenotypeMap = $phenotypeMap['phenotypeMap'];
             $pm_uri = parent::getRes() . $omim_id . "_pm_" . ($i + 1);
             parent::addRDF(parent::describeIndividual($pm_uri, "phenotype mapping for {$omim_id}", parent::getVoc() . "Phenotype-Map") . parent::describeClass(parent::getVoc() . "Phenotype-Map", "OMIM Phenotype-Map") . parent::triplify($omim_uri, parent::getVoc() . "phenotype-map", $pm_uri));
             foreach (array_keys($phenotypeMap) as $k) {
                 if (in_array($k, array("mimNumber", "phenotypeMimNumber", "phenotypicSeriesMimNumber"))) {
                     parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . $k, "omim:" . $phenotypeMap[$k]));
                 } else {
                     if ($k == "geneSymbols") {
                         $l = explode(", ", $phenotypeMap[$k]);
                         foreach ($l as $gene) {
                             parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . "gene-symbol", "hgnc.symbol:" . $gene));
                         }
                     } else {
                         if ($k == "phenotypeMappingKey") {
                             $l = $this->get_phenotype_mapping_method_type($phenotypeMap[$k]);
                             parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . "mapping-method", $l));
                         } else {
                             parent::addRDF(parent::triplifyString($pm_uri, parent::getVoc() . $k, $phenotypeMap[$k]));
                         }
                     }
                 }
             }
         }
     }
     // references
     if (isset($o['referenceList'])) {
         foreach ($o['referenceList'] as $i => $r) {
             $r = $r['reference'];
             if (isset($r['pubmedID'])) {
                 $pubmed_uri = "pubmed:" . $r['pubmedID'];
                 parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "article", $pubmed_uri));
                 $title = 'article';
                 if (isset($r['title'])) {
                     $title = $r['title'];
                 }
                 parent::addRDF(parent::describe($pubmed_uri, addslashes($r['title'])));
                 if (isset($r['articleUrl'])) {
                     parent::addRDF($this->QQuadO_URL($pubmed_uri, "rdfs:seeAlso", htmlentities($r['articleUrl'])));
                 }
             }
         }
     }
     // external ids
     if (isset($o['externalLinks'])) {
         foreach ($o['externalLinks'] as $k => $id) {
             if ($id === false) {
                 continue;
             }
             $ns = '';
             switch ($k) {
                 case 'approvedGeneSymbols':
                     $ns = 'symbol';
                     break;
                 case 'geneIDs':
                     $ns = 'ncbigene';
                     break;
                 case 'ncbiReferenceSequences':
                     $ns = 'gi';
                     break;
                 case 'genbankNucleotideSequences':
                     $ns = 'gi';
                     break;
                 case 'proteinSequences':
                     $ns = 'gi';
                     break;
                 case 'uniGenes':
                     $ns = 'unigene';
                     break;
                 case 'ensemblIDs':
                     $ns = 'ensembl';
                     break;
                 case 'swissProtIDs':
                     $ns = 'uniprot';
                     break;
                 case 'mgiIDs':
                     $ns = 'mgi';
                     $b = explode(":", $id);
                     $id = $b[1];
                     break;
                 case 'flybaseIDs':
                     $ns = 'flybase';
                     break;
                 case 'zfinIDs':
                     $ns = 'zfin';
                     break;
                 case 'hprdIDs':
                     $ns = 'hprd';
                     break;
                 case 'orphanetDiseases':
                     $ns = 'orphanet';
                     break;
                 case 'refSeqAccessionIDs':
                     $ns = 'refseq';
                     break;
                 case 'ordrDiseases':
                     $ns = 'ordr';
                     $b = explode(";;", $id);
                     $id = $b[0];
                     break;
                 case 'snomedctIDs':
                     $ns = 'snomed';
                     break;
                 case 'icd10cmIDs':
                     $ns = 'icd10';
                     break;
                 case 'icd9cmIDs':
                     $ns = 'icd9';
                     break;
                 case 'umlsIDs':
                     $ns = 'umls';
                     break;
                 case 'wormbaseIDs':
                     $ns = 'wormbase';
                     break;
                 case 'diseaseOntologyIDs':
                     $ns = 'do';
                     break;
                     // specifically ignorning
                 // specifically ignorning
                 case 'geneTests':
                 case 'cmgGene':
                 case 'geneticAllianceIDs':
                     // #
                 // #
                 case 'nextGxDx':
                 case 'nbkIDs':
                     // NBK1207;;Alport Syndrome and Thin Basement Membrane Nephropathy
                 // NBK1207;;Alport Syndrome and Thin Basement Membrane Nephropathy
                 case 'newbornScreeningUrls':
                 case 'decipherUrls':
                 case 'geneReviewShortNames':
                 case 'locusSpecificDBs':
                 case 'geneticsHomeReferenceIDs':
                 case 'omiaIDs':
                 case 'coriellDiseases':
                 case 'clinicalDiseaseIDs':
                 case 'possumSyndromes':
                 case 'keggPathways':
                 case 'gtr':
                 case 'gwasCatalog':
                 case 'mgiHumanDisease':
                 case 'wormbaseDO':
                 case 'dermAtlas':
                     // true/false
                     break;
                 default:
                     echo "unhandled external link {$k} {$id}" . PHP_EOL;
             }
             $ids = explode(",", $id);
             foreach ($ids as $id) {
                 if ($ns) {
                     if (strstr($id, ";;") === FALSE) {
                         parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-{$ns}", $ns . ':' . $id));
                     } else {
                         $b = explode(";;", $id);
                         // multiple ids//names
                         foreach ($b as $c) {
                             preg_match("/([a-z])/", $c, $m);
                             if (!isset($m[1])) {
                                 parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-{$ns}", $ns . ':' . $c));
                             }
                         }
                     }
                 }
             }
         }
     }
     //external links
 }