function genes($file) { $xml = new CXML($file); while ($xml->parse("DisorderList") == TRUE) { $x = $xml->GetXMLRoot(); foreach ($x->Disorder as $d) { $orphanet_id = parent::getNamespace() . (string) $d->OrphaNumber; $disorder_name = (string) $d->Name; foreach ($d->DisorderGeneAssociationList->DisorderGeneAssociation as $dga) { // gene $gene = $dga->Gene; $gene_id = parent::getNamespace() . (string) $gene->OrphaNumber; $gene_internal_id = (string) $gene->attributes()->id; $gene_label = (string) $gene->Name; $gene_symbol = (string) $gene->Symbol; parent::addRDF(parent::describeIndividual($gene_id, $gene_label, parent::getVoc() . "Gene") . parent::describeClass(parent::getVoc() . "Gene", "orphanet gene") . parent::triplifyString($gene_id, parent::getVoc() . "symbol", $gene_symbol)); foreach ($gene->SynonymList as $s) { $synonym = (string) $s->Synonym; parent::addRDF(parent::triplifyString($gene_id, parent::getVoc() . "synonym", $synonym)); } foreach ($gene->ExternalReferenceList as $erl) { $er = $erl->ExternalReference; $db = (string) $er->Source; $db = parent::getRegistry()->getPreferredPrefix($db); $id = (string) $er->Reference; $xref = "{$db}:{$id}"; parent::addRDF(parent::triplify($gene_id, parent::getVoc() . "x-{$db}", $xref)); } $dga_id = parent::getRes() . (string) $d->OrphaNumber . "_" . md5($dga->asXML()); $ga = $dga->DisorderGeneAssociationType; $ga_id = parent::getNamespace() . (string) $ga->attributes()->id; $ga_label = (string) $ga->Name; $s = $dga->DisorderGeneAssociationStatus; $s_id = parent::getNamespace() . (string) $s->attributes()->id; $s_label = (string) $s->Name; parent::addRDF(parent::describeIndividual($dga_id, "{$ga_label} {$gene_label} in {$disorder_name} ({$s_label})", $ga_id) . parent::describeClass($ga_id, $ga_label, parent::getVoc() . "Disorder-Gene-Association") . parent::triplify($dga_id, parent::getVoc() . "status", $s_id) . parent::describeClass($s_id, $s_label, parent::getVoc() . "Disorder-Gene-Association-Status") . parent::triplify($dga_id, parent::getVoc() . "disorder", $orphanet_id) . parent::describeIndividual($orphanet_id, $disorder_name, parent::getVoc() . "Disorder") . parent::triplify($dga_id, parent::getVoc() . "gene", $gene_id)); } parent::writeRDFBufferToWriteFile(); } } unset($xml); }
function parse_drugbank($ldir, $infile) { $xml = new CXML($ldir . $infile); while ($xml->parse("drug") == TRUE) { if (isset($this->id_list) and count($this->id_list) == 0) { break; } $this->parseDrugEntry($xml); } unset($xml); }
function parse($file) { $xml = new CXML($file); $xml->parse(); $entry = $xml->getXMLRoot(); if (!isset($entry) or !$entry) { return false; } foreach ($entry->children() as $o) { $rsid = "rs" . $o->attributes()->rsId; $id = parent::getNamespace() . $rsid; $type = parent::getVoc() . ucfirst(str_replace(" ", "-", (string) $o->attributes()->snpClass)); $snpclass = parent::getVoc() . (string) $o->attributes()->snpClass; $moltype = parent::getVoc() . (string) $o->attributes()->molType; // attributes parent::addRDF(parent::describeIndividual($id, $rsid, $type) . parent::describeClass($type, ucfirst("" . $o->attributes()->snpClass)) . parent::triplify($id, parent::getVoc() . "mol-type", $moltype) . parent::describeClass($moltype, (string) $o->attributes()->molType, parent::getVoc() . "Moltype") . parent::describeClass(parent::getVoc() . "Moltype", "Moltype") . parent::triplify($id, parent::getVoc() . "taxid", "taxonomy:" . (string) $o->attributes()->taxId)); $genotype = (string) $o->attributes()->genoType; if ($genotype) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "genotype", parent::getVoc() . $genotype, "xsd:bool")); } // frequency // create/update /* if(!isset($o->Update)) $a = $o->Create; else $a = $o->Update; parent::addRDF(parent::triplifyString($id,parent::getVoc()."build",(string) $a->attributes()->build)); */ //validation $a = $o->Validation; parent::addRDF(parent::triplifyString($id, parent::getVoc() . "validation-by-cluster", (string) $a->attributes()->byCluster) . parent::triplifyString($id, parent::getVoc() . "validation-by-frequency", (string) $a->attributes()->byFrequency) . parent::triplifyString($id, parent::getVoc() . "validation-by-2hit2allele", (string) $a->attributes()->by2Hit2Allele) . parent::triplifyString($id, parent::getVoc() . "validation-by-1000G", (string) $a->attributes()->by1000G)); //hgvs names foreach ($o->hgvs as $name) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "hgvs-name", (string) $name)); } // assembly $assembly = $o->Assembly; if ($assembly and $assembly->attributes()->reference == "true") { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "dbsnp-build", (string) $assembly->attributes()->dbSnpBuild) . parent::triplifyString($id, parent::getVoc() . "genome-build", (string) $assembly->attributes()->genomeBuild)); $component = $assembly->Component; if ($component) { parent::addRDF(parent::triplify($id, parent::getVoc() . "contig-accession", "genbank:" . (string) $component->attributes()->accession) . parent::triplify($id, parent::getVoc() . "contig-gi", "gi:" . (string) $component->attributes()->gi) . parent::triplifyString($id, parent::getVoc() . "chromosome", (string) $component->attributes()->chromosome)); $maploc = $component->MapLoc; if ($maploc) { foreach ($maploc->children() as $fxnset) { $fxnset_id = parent::getRes() . md5($fxnset->asXML()); parent::addRDF(parent::triplify($id, parent::getVoc() . "maps-to", $fxnset_id) . parent::triplify($fxnset_id, "rdf:type", parent::getVoc() . "Fxnset") . parent::describeClass(parent::getVoc() . "Fxnset", "Fxnset")); if (isset($fxnset->attributes()->geneId)) { parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "gene", "ncbigene:" . (string) $fxnset->attributes()->geneId)); } if (isset($fxnset->attributes()->symbol)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "gene-symbol", (string) $fxnset->attributes()->symbol)); } if (isset($fxnset->attributes()->mrnaAcc)) { parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "mrna", "refseq:" . (string) $fxnset->attributes()->mrnaAcc)); } if (isset($fxnset->attributes()->protAcc)) { parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "protein", "refseq:" . (string) $fxnset->attributes()->protAcc)); } if (isset($fxnset->attributes()->fxnClass)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "fxn-class", (string) $fxnset->attributes()->fxnClass)); } if (isset($fxnset->attributes()->allele)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "allele", (string) $fxnset->attributes()->allele)); } if (isset($fxnset->attributes()->residue)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "residue", (string) $fxnset->attributes()->residue)); } if (isset($fxnset->attributes()->readingFrame)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "reading-frame", (string) $fxnset->attributes()->readingFrame)); } if (isset($fxnset->attributes()->aaPosition)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "position", (string) $fxnset->attributes()->aaPosition)); } } } } } } unset($xml); }