function pathways() { // needs to be finished return; while ($l = $this->getReadFile()->read(50000)) { $a = explode("\t", trim($l)); // From To Reaction Type Controller Control Type Cell Type PubMed Id Genes Drugs Diseases // hmg coa reductase inhibitors Active & Inactive metabolites Biochemical Reaction CYP2C19,CYP2C8,CYP2C9,CYP2D6,CYP3A4,CYP3A5,UGT1A1,UGT1A3,UGT2B7 Catalysis hepatocyte CYP3A4,CYP3A5,UGT1A3,CYP2C19,CYP2C9,CYP2C8,CYP2D6,UGT1A1,UGT2B7 hmg coa reductase inhibitors $c1 = array_search($a[0], $this->drugs); if ($c1 === FALSE) { $c1 = array_search($a[0], $this->genes); if ($c1 === FALSE) { $c1 = parent::getRes() . url_encode($c1); } else { $c1 = parent::getNamespace() . $c1; } } $c2 = array_search($a[1], $this->drugs); if ($c2 === FALSE) { $c2 = array_search($a[1], $this->genes); if ($c2 === FALSE) { // not found $c2 = parent::getRes() . url_encode($c2); } else { // actual id $c2 = parent::getNamespace() . $c2; } } $id = md5($l); $uri = parent::getRes() . $id; parent::writeRDFBufferToWriteFile(); } }
function pubmed() { $citations = null; $ext = substr(strrchr($this->getReadFile()->getFileName(), '.'), 1); if ($ext = "gz") { $citations = new SimpleXMLElement("compress.zlib://" . $this->getReadFile()->getFileName(), NULL, TRUE); } elseif ($ext = "xml") { $citations = new SimpleXMLElement($this->getReadFile()->getFileName(), NULL, TRUE); } foreach ($citations->MedlineCitation as $citation) { $this->setCheckPoint('record'); $pmid = "" . $citation->PMID; if (isset($this->id_list)) { if (!isset($this->id_list[$pmid])) { continue; } else { echo "processing {$pmid}" . PHP_EOL; } } $pmid_uri = parent::getNamespace() . $citation->PMID; $article = $citation->Article; parent::addRDF(parent::describeIndividual($pmid_uri, $this->getString($article->ArticleTitle), parent::getVoc() . "PubMedRecord") . parent::describeClass(parent::getVoc() . "PubMedRecord", "PubMedRecord") . parent::triplify($pmid_uri, "rdfs:seeAlso", "http://www.ncbi.nlm.nih.gov/pubmed/{$pmid}")); // metadata about the record $owner = parent::getRes() . md5($citation['Owner']); parent::addRDF(parent::describeIndividual($owner, $citation['Owner'], "foaf:Agent") . parent::triplify($pmid_uri, parent::getVoc() . "owner", $owner)); $status = parent::getRes() . md5($citation['Status']); parent::addRDF(parent::describeIndividual($status, $citation['Status'], parent::getVoc() . "Status") . parent::describeClass(parent::getVoc() . "Status", "Status") . parent::triplify($pmid_uri, parent::getVoc() . "status", $status) . parent::triplifyString($pmid_uri, parent::getVoc() . "version", $citation['VersionID'])); $this->addDate($pmid_uri, "version-date", $citation['VersionDate']); $this->addDate($pmid_uri, "date-created", $citation->DateCreated); $this->addDate($pmid_uri, "date-revised", $citation->DateRevised); $this->addDate($pmid_uri, "date-completed", $citation->DateCompleted); if (!empty($citation->MeshHeadingList)) { $i = 0; foreach ($citation->MeshHeadingList->MeshHeading as $mh) { $id = parent::getRes() . $pmid . "_mh_" . ++$i; $did = parent::getRes() . md5($mh->DescriptorName); parent::addRDF(parent::describeIndividual($id, $mh->DescriptorName, parent::getVoc() . "MeshHeading") . parent::describeClass(parent::getVoc() . "MeshHeading", "MeSH Heading") . parent::triplify($pmid_uri, parent::getVoc() . "mesh-heading", $id) . parent::triplifyString($id, parent::getVoc() . "descriptor-major-topic", "" . $mh->DescriptorName['MajorTopicYN']) . parent::describeIndividual($did, "" . $mh->DescriptorName, parent::getVoc() . "Mesh-Descriptor") . parent::triplify($id, parent::getVoc() . "mesh-descriptor", $did)); if (!empty($mh->QualifierName)) { foreach ($mh->QualifierName as $qualifier_name) { $qid = parent::getRes() . md5($qualifier_name); parent::addRDF(parent::describeIndividual($qid, $qualifier_name, parent::getVoc() . "Mesh-Qualifier") . parent::triplify($id, parent::getVoc() . "mesh-qualifier", $qid)); } } } } if (!empty($citation->ChemicalList)) { $i = 0; foreach ($citation->ChemicalList->Chemical as $chemical) { $id = parent::getRes() . $pmid . "_ch_" . ++$i; parent::addRDF(parent::describeIndividual($id, $chemical->NameOfSubstance, parent::getVoc() . "Chemical") . parent::describeClass(parent::getVoc() . "Chemical", "Chemical") . parent::triplify($pmid_uri, parent::getVoc() . "chemical", $id)); if ($chemical->RegistryNumber != "0") { // check if "EC" if (substr($chemical->RegistryNumber, 0, 2) == "EC") { $ec = substr($chemical->RegistryNumber, 3); parent::addRDF(parent::triplify($id, parent::getVoc() . "x-ec", "ec:" . $ec)); } else { parent::addRDF(parent::triplify($id, parent::getVoc() . "x-cas", "cas:" . $chemical->RegistryNumber)); } } } } if (!empty($citation->GeneSymbolList)) { foreach ($citation->GeneSymbolList->GeneSymbol as $geneSymbol) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "gene-symbol", $geneSymbol)); } } if (!empty($citation->SupplMeshList)) { foreach ($citation->SupplMeshList->SupplMeshName as $supplMeshName) { $id = parent::getRes() . md5($supplMeshName); parent::addRDF(parent::describeIndividual($id, $supplMeshName, parent::getVoc() . "MeshHeading") . parent::triplify($pmid_uri, parent::getVoc() . "supplemental-mesh-heading", $id)); } } foreach ($article->PublicationTypeList->PublicationType as $publicationType) { $id = parent::getRes() . md5($publicationType); $label = str_replace(" ", "-", $publicationType); parent::addRDF(parent::triplify($pmid_uri, parent::getVoc() . "publication-type", $id) . parent::describeClass($id, $publicationType)); } if (!empty($article->Abstract)) { $id = parent::getRes() . $pmid . "_ABSTRACT"; $label = "Abstract for PMID:{$pmid}"; $abstract = $article->Abstract; parent::addRDF(parent::describeIndividual($id, $label, parent::getVoc() . "Article-Abstract") . parent::describeClass(parent::getVoc() . "Article-Abstract", "Article Abstract") . parent::triplify($pmid_uri, "dc:abstract", $id) . parent::triplifyString($id, parent::getVoc() . "copyright", $abstract->CopyrightInformation)); $section = 0; $abstractText = ""; foreach ($abstract->AbstractText as $text) { $abstractText .= " " . $text; if (!empty($text['Label']) && $text['Label'] !== "UNLABELLED") { $section_id = parent::getRes() . $pmid . "_ABSTRACT_SECTION_" . ++$section; parent::addRDF(parent::triplify($id, parent::getVoc() . "section", $section_id) . parent::triplifyString($section_id, parent::getVoc() . "order", $section) . parent::triplifyString($section_id, parent::getVoc() . "nlm-section-type", $text['NlmCategory']) . parent::triplifyString($section_id, parent::getVoc() . "label", $text['Label']) . parent::triplifyString($section_id, parent::getVoc() . "text", $text)); } } parent::addRDF(parent::triplifyString($id, parent::getVoc() . "abstract-text", $abstractText)); } if (!empty($citation->OtherAbstract)) { $i = 0; foreach ($citation->OtherAbstract as $ab) { $id = parent::getRes() . $pmid . "_oa_" . ++$i; parent::addRDF(parent::describeIndividual($id, "", parent::getVoc() . "Article-Abstract") . parent::describeClass(parent::getVoc() . "Article-Abstract", "Article Abstract") . parent::triplify($pmid_uri, "dc:abstract", $id)); $abstractText = ""; foreach ($ab->AbstractText as $text) { $abstractText .= " " . $text; if (!empty($text['Label']) && $text['Label'] !== "UNLABELLED") { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "abstract_" . strtolower($text['Category']), $text)); } } parent::addRDF(parent::triplifyString($id, parent::getVoc() . "abstract-text", $abstractText)); } } $author_types = array("Investigator", "Author", "PersonalNameSubject"); foreach ($author_types as $author_type) { $listname = $author_type . "List"; if (!empty($article->{$listname}->{$author_type})) { $i = 0; foreach ($article->{$listname}->{$author_type} as $author) { $id = parent::getRes() . $pmid . "_AUTHOR_" . ++$i; $author_label = $author->LastName . ($author->Initials ? ", " . $author->Initials : ""); parent::addRDF(parent::describeIndividual($id, $author_label, parent::getVoc() . $author_type) . parent::describeClass(parent::getVoc() . $author_type, $author_type) . parent::triplifyString($id, parent::getVoc() . "list-position", $i) . parent::triplify($pmid_uri, parent::getVoc() . strtolower($author_type), $id) . parent::triplifyString($id, parent::getVoc() . "last-name", $author->LastName) . parent::triplifyString($id, parent::getVoc() . "fore-name", $author->ForeName) . parent::triplifyString($id, parent::getVoc() . "initials", $author->Initials) . parent::triplifyString($id, parent::getVoc() . "collective-name", $author->CollectiveName) . parent::triplifyString($id, parent::getVoc() . "suffix", $author->Suffix)); if ($author->Affiliation) { $affilitation = parent::getRes() . md5($author->Affilitation); parent::addRDF(parent::describeIndividual($affilitation, $author->Affilitation, parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplifyString($id, parent::getVoc() . "affiliation", $affilitation)); } foreach ($author->NameID as $authorNameId) { if (!empty($authorNameId)) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "name-id", $author_name_id)); } } } } } if (!empty($article->ArticleDate)) { $this->addDate($pmid_uri, "article-date", $article->ArticleDate); } foreach ($article->Language as $language) { parent::addRDF(parent::triplifyString($pmid_uri, "dc:language", $language)); } if (!empty($citation->KeywordList)) { foreach ($citation->KeywordList->Keyword as $keyword) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "keyword", $keyword)); } } if (!empty($citation->otherID)) { // untested foreach ($citation->OtherID as $otherID) { if (!empty($otherID)) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "other-id", $other_id) . parent::triplifyString($pmid_uri, parent::getVoc() . "other-id-source", $otherID['Source'])); if (strstr($other_id, "PMC")) { parent::addRDF(parent::triplify($pmid_uri, parent::getVoc() . "x-pmc", "pmc:" . $other_id)); } } } } if (!empty($article->DataBankList)) { foreach ($article->DataBankList->DataBank as $dataBank) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "databank", $dataBank->DataBankName)); if ($dataBank->AccessionNumberList !== NULL) { foreach ($dataBank->AccessionNumberList->AccessionNumber as $acc) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "x-" . strtolower($dataBank->dataBankName), $acc)); } } } } if (!empty($article->GrantList)) { $i = 0; foreach ($article->GrantList->Grant as $grant) { $id = parent::getRes() . $pmid . "_GRANT_" . ++$i; $grant_label = "Grant " . $grant->GrantID . " for " . parent::getNamespace() . $pmid; parent::addRDF(parent::describeIndividual($id, $grant_label, parent::getVoc() . "Grant") . parent::describeClass(parent::getVoc() . "Grant", "Grant") . parent::triplify($pmid_uri, parent::getVoc() . "grant", $id) . parent::triplifyString($id, parent::getVoc() . "grant-identifier", $grant->GrantID) . parent::triplifyString($id, parent::getVoc() . "grant-acronym", $grant->Acronym) . parent::triplifyString($id, parent::getVoc() . "grant-agency", $grant->Agency) . parent::triplifyString($id, parent::getVoc() . "grant-country", $grant->Country)); } } if (!empty($citation->NumberOfReferences)) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "number-of-references", $citation->NumberOfReferences)); } if (!empty($article->VernacularTitle)) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "vernacular-title", $article->VernacularTitle)); } foreach ($citation->CitationSubset as $citationSubset) { if (!empty($citationSubset)) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "citation-subset", $citationSubset)); } } if (!empty($citation->commentsCorrectionsList)) { $i = 0; foreach ($commentsCorrectionsList->CommentsCorrections as $commentCorrection) { $id = parent::getRes() . $pmid . "_COMMENT_CORRECTION_" . ++$i; $ccRefType = $commentCorrection['RefType']; $ccPmid = $commentCorrection->PMID; //optional $ccNote = $commentCorrection->Note; //optional $cc_label = "Comment or correction ." . $ccNumber . " for " . parent::getNamespace() . $pmid; parent::addRDF(parent::describeIndividual($id, $cc_label, parent::getVoc() . "CommentCorrection") . parent::describeClass(parent::getVoc() . "CommentCorrection", "CommentCorrection") . parent::triplify($pmid_uri, parent::getVoc() . "comment-correction", $id) . parent::triplify($id, "rdf:type", parent::getVoc() . $ccRefType) . parent::triplifyString($id, parent::getVoc() . "ref-source", $ref_source) . parent::triplifyString($id, parent::getVoc() . "note", $cc_note)); } } if (!empty($citation->generalNote)) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "general-note", $general_note)); } foreach ($citation->SpaceFlightMission as $spaceFlightMission) { if (!empty($spaceFlightMission)) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "space-flight-mission" . $space_flight_mission)); } } $journal = $article->Journal; $journalId = parent::getRes() . $pmid . "_JOURNAL"; $journal_label = "Journal for " . parent::getNamespace() . $pmid; parent::addRDF(parent::describeIndividual($journalId, $journal_label, parent::getVoc() . "Journal") . parent::describeClass(parent::getVoc() . "Journal", "Journal") . parent::triplify($pmid_uri, parent::getVoc() . "journal", $journalId) . parent::triplify($journalId, parent::getVoc() . "x-issn", "issn:" . $journal->ISSN) . parent::triplifyString($journalId, parent::getVoc() . "journal-nlm-identifier", $citation->MedLineJournalInfo->NlmUniqueID) . parent::triplifyString($journalId, parent::getVoc() . "journal-title", $journal->Title) . parent::triplifyString($journalId, parent::getVoc() . "journal-abbreviation", $journal->ISOAbbreviation) . parent::triplifyString($journalId, parent::getVoc() . "volume", $journal->JournalIssue->Volume) . parent::triplifyString($journalId, parent::getVoc() . "issue", $journal->JournalIssue->Issue) . parent::triplifyString($journalId, parent::getVoc() . "pages", "" . $article->Pagination->MedlinePgn)); $journalPubDate = $journal->JournalIssue->PubDate; if (!empty($journalPubDate)) { $journalYear = $journalPubDate->Year; $journalMonth = trim($journalPubDate->Month); //optional if ($journalMonth and !is_numeric($journalMonth[0])) { $mo = array("jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"); $journalMonth = str_pad(array_search(strtolower($journalMonth), $mo) + 1, 2, "0", STR_PAD_LEFT); } $journalDay = trim($journalPubDate->Day); //optional if ($journalDay) { $journalDay = str_pad($journalDay, 2, "0", STR_PAD_LEFT); } parent::addRDF(parent::triplifyString($journalId, parent::getVoc() . "publication-year", $journalYear) . parent::triplifyString($journalId, parent::getVoc() . "publication-month", $journalMonth) . parent::triplifyString($journalId, parent::getVoc() . "publication-day", $journalDay) . parent::triplifyString($journalId, parent::getVoc() . "publication-season", $journalPubDate->Season) . parent::triplifyString($journalId, parent::getVoc() . "publication-date", $journalPubDate->MedlineDate)); if (!empty($journalYear) and !empty($journalMonth) and !empty($journalDay)) { parent::addRDF(parent::triplifyString($journalId, parent::getVoc() . "publication-date", "{$journalYear}-{$journalMonth}-{$journalDay}", "xsd:date")); } } foreach ($citation->Article->ELocation as $eLocation) { if (!empty($eLocation)) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "elocation", $eLocation)); } } $this->writeRDFBufferToWriteFile(); //break; } }
function Parse($file) { parent::getReadFile()->read(); // skip the first comment line $line = 1; $first = true; while ($l = parent::getReadFile()->read(500000)) { if ($l[0] == "#") { // dataset attributes $a = explode('=', trim($l)); $r = $this->getVoc() . substr($a[0], 2); if (isset($a[1])) { $v = $a[1]; if ($r == "affymetrix_vocabulary:genome-version-create_date") { $x = explode("-", $a[1]); if ($x[2] == "00") { $x[2] = "01"; } $v = implode("-", $x); } parent::addRDF(parent::triplifyString(parent::getDatasetURI(), $r, $v) . parent::describe($r, "{$r}")); } continue; } if ($first == true) { $first = false; // header $header = explode(",", str_replace('"', '', trim($l))); // print_r($header);exit; $n = count($header); if ($n != 41) { trigger_error("Expecting 41 columns, found {$n} in header on line {$line}!", E_USER_ERROR); exit; } continue; } $a = explode('","', substr($l, 1, -2)); $n = count($a); if ($n != 41) { trigger_error("Expecting 41 columns, found {$n} on line {$line}!", E_USER_ERROR); exit; } parent::writeRDFBufferToWriteFile(); $id = $a[0]; $qname = "affymetrix:{$id}"; $label = "probeset {$a['0']} on GeneChip {$a['1']} ({$a['2']})"; parent::addRDF(parent::describeIndividual($qname, $label, $this->getVoc() . "Probeset") . parent::describeClass($this->getVoc() . "Probeset", "Affymetrix probeset")); trigger_error($id, E_USER_NOTICE); // now process the entries foreach ($a as $k => $v) { if (trim($v) == '---') { continue; } // multi-valued entries are separated by //// $b = explode(" /// ", $v); $r = $this->Map($k); if (isset($r)) { foreach ($b as $c) { $d = explode(" // ", $c); if ($r == 'symbol') { $d[0] = str_replace(" ", "-", $d[0]); } $s = $this->getRegistry()->getPreferredPrefix($r); if ($s == "ec") { $e = explode(":", $d[0]); $d[0] = $e[1]; } $this->addRDF(parent::triplify($qname, $this->getVoc() . "x-{$s}", "{$s}:" . $d[0]) . parent::describeProperty($this->getVoc() . "x-{$s}", "a relation to {$s}")); } } else { // we handle manually unset($rel); $label = $header[$k]; switch ($label) { case 'GeneChip Array': $array_id = parent::getRes() . str_replace(" ", "-", $v); parent::addRDF(parent::triplify($qname, $this->getVoc() . "genechip-array", $array_id) . parent::describeIndividual($array_id, "Affymetrix {$v} GeneChip array", $this->getVoc() . "Genechip-Array") . parent::describeClass($this->getVoc() . "Genechip-Array", "Affymetrix GeneChip array")); break; case 'Gene Ontology Biological Process': if (!isset($rel)) { $rel = 'go-process'; $prefix = "go"; } case 'Gene Ontology Cellular Component': if (!isset($rel)) { $rel = 'go-location'; $prefix = "go"; } case 'Gene Ontology Molecular Function': if (!isset($rel)) { $rel = 'go-function'; $prefix = "go"; } $b = explode(" /// ", $v); foreach ($b as $c) { $d = explode(" // ", $c); parent::addRDF($this->triplify($qname, $this->getVoc() . $rel, "{$prefix}:" . $d[0]) . $this->describeProperty($this->getVoc() . $rel, "{$rel}")); } break; case 'Transcript Assignments': $b = explode(" /// ", $v); foreach ($b as $c) { $d = explode(" // ", $c); $id = $d[0]; $prefix = $d[2]; if ($prefix == '---' || $id == '---') { continue; } else { if ($prefix == 'gb' || $prefix == 'gb_htc') { $prefix = 'genbank'; } else { if ($prefix == 'ncbibacterial') { $prefix = 'gi'; } else { if ($prefix == 'ncbi_bacterial') { $prefix = 'gi'; } else { if ($prefix == 'ens') { $prefix = 'ensembl'; } else { if ($prefix == 'ncbi_mito' || $prefix == 'ncbi_organelle' || $prefix == 'organelle') { $prefix = 'refseq'; } else { if ($prefix == 'affx' || $prefix == 'unknown' || $prefix == "prop") { $prefix = 'affymetrix'; } else { if ($prefix == 'tigr_2004_08') { $prefix = 'tigr'; } else { if ($prefix == 'tigr-plantta') { $prefix = 'genbank'; } else { if ($prefix == 'newrs.gi') { $prefix = 'gi'; } else { if ($prefix == 'newRS.gi') { $prefix = 'gi'; } else { if ($prefix == 'primate_viral') { $prefix = 'genbank'; } else { if ($prefix == 'jgi-bacterial') { $prefix = 'ncbigene'; } else { if ($prefix == 'tb') { $prefix = 'tuberculist'; } else { if ($prefix == 'pa') { $prefix = 'pseudomonas'; } else { if ($prefix == 'gi|53267') { $prefix = 'gi'; $id = '53267'; } else { if ($prefix == 'broad-tcup') { $e = explode("-", $id); $id = $e[0]; } else { if ($prefix == 'organelle') { $e = explode("-", $id); $prefix = 'genbank'; $id = $e[0]; } } } } } } } } } } } } } } } } } } parent::addRDF(parent::triplify($qname, $this->getVoc() . "transcript-assignment", "{$prefix}:{$id}") . parent::describeProperty($this->getVoc() . "transcript-assignment", "transcript assignment")); } break; case 'Annotation Transcript Cluster': /* $id = substr($v,0,strpos($v,"(")); $rel = str_replace(" ","-",strtolower($label)); $this->AddRDF($this->triplify($qname,parent::getVoc()."$rel", "refseq:$id")); */ break; case 'Annotation Date': // Jun 9, 2011 $rel = "annotation-date"; preg_match("/^([A-Za-z]+) ([0-9]+), ([0-9]{4})\$/", $v, $m); if (count($m) == 4) { array_shift($m); list($m, $day, $year) = $m; $month = $this->getMonth($m); if (!$day || $day == "0") { $day = "01"; } $date = $year . "-" . $month . "-" . str_pad($day, 2, "0", STR_PAD_LEFT) . "T00:00:00Z"; parent::addRDF(parent::triplifyString($qname, $this->getVoc() . $rel, $date, "xsd:dateTime") . parent::describeProperty($this->getVoc() . $rel, "{$rel}")); } else { trigger_error("could not match date from {$v}", E_USER_ERROR); } break; case 'Species Scientific Name': break; case 'Transcript ID(Array Design)': if (!isset($rel)) { $rel = 'transcript'; } case 'Sequence type': default: if (!isset($rel)) { $rel = str_replace(" ", "-", strtolower($label)); } $b = explode(" /// ", $v); foreach ($b as $c) { parent::addRDF(parent::triplifyString($qname, $this->getVoc() . $rel, stripslashes($c)) . parent::describeProperty($this->getVoc() . $rel, "{$rel}")); } break; } // switch } // else } $this->WriteRDFBufferToWriteFile(); } }
function parse($file) { $xml = new CXML($file); $xml->parse(); $entry = $xml->getXMLRoot(); if (!isset($entry) or !$entry) { return false; } foreach ($entry->children() as $o) { $rsid = "rs" . $o->attributes()->rsId; $id = parent::getNamespace() . $rsid; $type = parent::getVoc() . ucfirst(str_replace(" ", "-", (string) $o->attributes()->snpClass)); $snpclass = parent::getVoc() . (string) $o->attributes()->snpClass; $moltype = parent::getVoc() . (string) $o->attributes()->molType; // attributes parent::addRDF(parent::describeIndividual($id, $rsid, $type) . parent::describeClass($type, ucfirst("" . $o->attributes()->snpClass)) . parent::triplify($id, parent::getVoc() . "mol-type", $moltype) . parent::describeClass($moltype, (string) $o->attributes()->molType, parent::getVoc() . "Moltype") . parent::describeClass(parent::getVoc() . "Moltype", "Moltype") . parent::triplify($id, parent::getVoc() . "taxid", "taxonomy:" . (string) $o->attributes()->taxId)); $genotype = (string) $o->attributes()->genoType; if ($genotype) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "genotype", parent::getVoc() . $genotype, "xsd:bool")); } // frequency // create/update /* if(!isset($o->Update)) $a = $o->Create; else $a = $o->Update; parent::addRDF(parent::triplifyString($id,parent::getVoc()."build",(string) $a->attributes()->build)); */ //validation $a = $o->Validation; parent::addRDF(parent::triplifyString($id, parent::getVoc() . "validation-by-cluster", (string) $a->attributes()->byCluster) . parent::triplifyString($id, parent::getVoc() . "validation-by-frequency", (string) $a->attributes()->byFrequency) . parent::triplifyString($id, parent::getVoc() . "validation-by-2hit2allele", (string) $a->attributes()->by2Hit2Allele) . parent::triplifyString($id, parent::getVoc() . "validation-by-1000G", (string) $a->attributes()->by1000G)); //hgvs names foreach ($o->hgvs as $name) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "hgvs-name", (string) $name)); } // assembly $assembly = $o->Assembly; if ($assembly and $assembly->attributes()->reference == "true") { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "dbsnp-build", (string) $assembly->attributes()->dbSnpBuild) . parent::triplifyString($id, parent::getVoc() . "genome-build", (string) $assembly->attributes()->genomeBuild)); $component = $assembly->Component; if ($component) { parent::addRDF(parent::triplify($id, parent::getVoc() . "contig-accession", "genbank:" . (string) $component->attributes()->accession) . parent::triplify($id, parent::getVoc() . "contig-gi", "gi:" . (string) $component->attributes()->gi) . parent::triplifyString($id, parent::getVoc() . "chromosome", (string) $component->attributes()->chromosome)); $maploc = $component->MapLoc; if ($maploc) { foreach ($maploc->children() as $fxnset) { $fxnset_id = parent::getRes() . md5($fxnset->asXML()); parent::addRDF(parent::triplify($id, parent::getVoc() . "maps-to", $fxnset_id) . parent::triplify($fxnset_id, "rdf:type", parent::getVoc() . "Fxnset") . parent::describeClass(parent::getVoc() . "Fxnset", "Fxnset")); if (isset($fxnset->attributes()->geneId)) { parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "gene", "ncbigene:" . (string) $fxnset->attributes()->geneId)); } if (isset($fxnset->attributes()->symbol)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "gene-symbol", (string) $fxnset->attributes()->symbol)); } if (isset($fxnset->attributes()->mrnaAcc)) { parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "mrna", "refseq:" . (string) $fxnset->attributes()->mrnaAcc)); } if (isset($fxnset->attributes()->protAcc)) { parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "protein", "refseq:" . (string) $fxnset->attributes()->protAcc)); } if (isset($fxnset->attributes()->fxnClass)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "fxn-class", (string) $fxnset->attributes()->fxnClass)); } if (isset($fxnset->attributes()->allele)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "allele", (string) $fxnset->attributes()->allele)); } if (isset($fxnset->attributes()->residue)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "residue", (string) $fxnset->attributes()->residue)); } if (isset($fxnset->attributes()->readingFrame)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "reading-frame", (string) $fxnset->attributes()->readingFrame)); } if (isset($fxnset->attributes()->aaPosition)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "position", (string) $fxnset->attributes()->aaPosition)); } } } } } } unset($xml); }
/** * add an RDF representation of the incoming param to the model. * @$desc_record_arr is an assoc array with the contents of one qualifier record */ private function makeSupplementaryRecord($sup_record_arr) { //get the UI of the supplementary record if (!isset($sup_record_arr['UI'][0]) or !isset($sup_record_arr['NM'][0])) { return; } $sr_ui = $sup_record_arr["UI"][0]; $sr_res = $this->getNamespace() . $sr_ui; $sr_label = $sup_record_arr['NM'][0]; parent::addRDF(parent::describeIndividual($sr_res, $sr_label, $this->getVoc() . "Supplementary-Descriptor", $sr_label) . parent::describeClass($this->getVoc() . "Supplementary-Descriptor", "MeSH Supplementary Descriptor")); //now get the descriptor_data_elements $sde = $this->getSupplementaryConceptRecords(); //iterate over the properties foreach ($sup_record_arr as $k => $v) { if (array_key_exists($k, $sde)) { //add date of entry if ($k == "DA") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['DA'], $this->formatDate($vv), "xsd:date") . parent::describeProperty($this->getVoc() . $sde['DA'], "Relationship between a supplementary record and its date of entry")); } } //if if ($k == "FR") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['FR'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['FR'], "Relationship between a supplementary record and its frequency")); } } //if if ($k == "HM") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['HM'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['HM'], "Relationship between a supplementary record and its heading mapping")); } } //if if ($k == "II") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['II'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['II'], "Relationship between a supplementary record and its indexing information")); } } //if if ($k == "MR") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['MR'], $this->formatDate($vv), "xsd:date") . parent::describeProperty($this->getVoc() . $sde['MR'], "Relationship between a supplementary record and its major revision date")); } } //if if ($k == "N1") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['N1'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['N1'], "Relationship between a supplementary record and its cas 1 name")); } } //if if ($k == "NM") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['NM'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['NM'], "Relationship between a supplementary record and its name of substance")); } } //if if ($k == "NM_TH") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['NM_TH'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['NM_TH'], "Relationship between a supplementary record and its term thesaurus id")); } } //if if ($k == "NO") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['NO'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['NO'], "Relationship between a supplementary record and its note")); } } //if if ($k == "PA") { foreach ($v as $kv => $vv) { $vlabel = utf8_encode(htmlspecialchars($vv)); $vid = parent::getRes() . md5($vv); parent::AddRDF(parent::describeIndividual($vid, $vlabel, parent::getVoc() . "Pharmacological-Action", $vlabel) . parent::triplify($sr_res, $this->getVoc() . $sde['PA'], $vid) . parent::describeProperty($this->getVoc() . $sde['PA'], "Relationship between a supplementary record and its pharmacological action")); } } //if if ($k == "PI") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['PI'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['PI'], "Relationship between a supplementary record and its previous indexing")); } } //if if ($k == "RECTYPE") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['RECTYPE'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['RECTYPE'], "Relationship between a supplementary record and its record type")); } } //if if ($k == "RN") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['RN'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['RN'], "Relationship between a supplementary record and its cas registry number or ec number")); } } //if if ($k == "RR") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['RR'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['RR'], "Relationship between a supplementary record and its related cas registry number")); } } //if if ($k == "SO") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['SO'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['SO'], "Relationship between a supplementary record and its source")); } } //if if ($k == "ST") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['ST'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['ST'], "Relationship between a supplementary record and its semantic type")); } } //if if ($k == "SY") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['SY'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['SY'], "Relationship between a supplementary record and its synonym")); } } //if if ($k == "TH") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['TH'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['TH'], "Relationship between a supplementary record and its thesaurus id")); } } //if } else { trigger_error("Please add key to descriptor record map: " . $k . PHP_EOL, E_USER_ERROR); } $this->WriteRDFBufferToWriteFile(); } //foreach $this->WriteRDFBufferToWriteFile(); }
function process() { $refseq_record_str = ""; while ($aLine = $this->getReadFile()->Read(40960)) { preg_match("/^\\/\\/\$/", $aLine, $matches); if (!count($matches)) { preg_match("/^\n\$/", $aLine, $matches); if (count($matches) == 0) { $refseq_record_str .= $aLine . PHP_EOL; } continue; } else { //now remove the header if it is there $refseq_record_str = $this->removeHeader($refseq_record_str); $sectionsRaw = $this->parseGenbankRaw($refseq_record_str); /** * SECTIONS being parsed: * locus, definition, accession, version, keywords, source * features **/ //get the locus section $locus = $this->retrieveSections("LOCUS", $sectionsRaw); $parsed_locus_arr = $this->parseLocus($locus); //get the definition $definition = $this->retrieveSections("DEFINITION", $sectionsRaw); $parsed_definition_arr = $this->parseDefinition($definition); //get the accession $accessions = $this->retrieveSections("ACCESSION", $sectionsRaw); $parsed_accession_arr = $this->parseAccession($accessions); //get the version $versions = $this->retrieveSections("VERSION", $sectionsRaw); $parsed_version_arr = $this->parseVersion($versions); //get the keywords $keywords = $this->retrieveSections("KEYWORDS", $sectionsRaw); $parsed_keyword_arr = $this->parseKeywords($keywords); //get the reference section $references = $this->retrieveSections("REFERENCE", $sectionsRaw); $parsed_refs_arr = $this->parseReferences($references); //get the source section $source = $this->retrieveSections("SOURCE", $sectionsRaw); $parsed_source_arr = $this->parseSource($source); //get the features $features = $this->retrieveSections("FEATURES", $sectionsRaw); $parsed_features_arr = $this->parseFeatures($features); //lets make some rdf $refseq_res = $this->getNamespace() . $parsed_version_arr['versioned_accession']; $refseq_label = utf8_encode(htmlspecialchars($parsed_definition_arr[0])); parent::AddRDF(parent::describeIndividual($refseq_res, $refseq_label, $this->getVoc() . 'refseq-record') . parent::triplifyString($refseq_res, $this->getVoc() . 'sequence-length', $parsed_locus_arr[0]['sequence_length']) . parent::triplifyString($refseq_res, $this->getVoc() . 'chromosome-shape', $parsed_locus_arr[0]['chromosome_shape']) . parent::triplifyString($refseq_res, $this->getVoc() . 'date-of-entry', $parsed_locus_arr[0]['date']) . parent::triplifyString($refseq_res, $this->getVoc() . 'source', utf8_encode($parsed_source_arr[0])) . parent::triplify($refseq_res, $this->getVoc() . 'fasta-seq', 'https://www.ncbi.nlm.nih.gov/sviewer/viewer.cgi?sendto=on&db=nucest&dopt=fasta&val=' . $parsed_version_arr['gi']) . parent::triplify('https://www.ncbi.nlm.nih.gov/sviewer/viewer.cgi?sendto=on&db=nucest&dopt=fasta&val=' . $parsed_version_arr['gi'], "rdf:type", $this->getVoc() . 'fasta-sequence')); //add the features to the rdf foreach ($parsed_features_arr as $aFeature) { $type = $aFeature['type']; $feat_desc = $this->getFeatures($type); $label = $type; $def = ''; if (isset($feat_desc['definition'])) { $def = preg_replace('/\\s\\s*/', ' ', $feat_desc['definition']); } $comment = null; // $value = $aFeature['value']; $value = str_replace("UniProtKB/Swiss-Prot", "UniProt", $aFeature['value']); // imperfect solution. $value_arr = explode("/", $value); $location = preg_replace('/\\n/', '', $value_arr[0]); $class_id = parent::getVoc() . md5($type); $feat_res = parent::getRes() . md5($type . $location . $refseq_res); $feat_label = utf8_encode($type . " " . $location . " for " . $refseq_res); if (isset($feat_desc['comment'])) { $comment = $feat_desc['comment']; $comment = preg_replace('/\\s\\s*/', ' ', $comment); $label .= " " . $comment; } parent::AddRDF(parent::describeClass($class_id, $label, parent::getVoc() . "Feature", $label, $def) . parent::describeIndividual($feat_res, $feat_label, $class_id) . parent::triplify($refseq_res, $this->getVoc() . "has-feature", $feat_res)); foreach ($value_arr as $aL) { //check if aL has an equals in it $p = "/(\\S+)\\=(.*)/"; preg_match($p, $aL, $m); if (count($m)) { if ($m[1] == "db_xref") { parent::AddRDF(parent::triplify($feat_res, "rdfs:seeAlso", str_replace("\"", "", $m[2]))); } else { parent::AddRDF(parent::triplifyString($feat_res, $this->getVoc() . $m[1], utf8_encode(str_replace("\"", "", $m[2])))); } } } } //add the accession foreach ($parsed_accession_arr[0] as $acc) { parent::AddRDF(parent::triplifyString($refseq_res, $this->getVoc() . "accession", $acc)); } //versioned accession if (isset($parsed_version_arr['versioned_accession'])) { parent::AddRDF(parent::triplifyString($refseq_res, $this->getVoc() . "versioned-accession", $parsed_version_arr['versioned_accession'])); } //keywords foreach ($parsed_keyword_arr as $akw) { parent::AddRDF(parent::triplifyString($refseq_res, $this->getVoc() . "keyword", $akw)); } //references foreach ($parsed_refs_arr as $aRef) { $r = rand(); $ref_res = $this->getRes() . md5($r); $ref_label = "reference for " . $refseq_res; if (isset($aRef['TITLE'])) { parent::AddRDF(parent::describeIndividual($ref_res, $ref_label, $this->getVoc() . "reference") . parent::triplifyString($ref_res, $this->getVoc() . "title", $aRef['TITLE'])); } if (isset($aRef['PUBMED'])) { parent::AddRDF(parent::triplify($ref_res, $this->getVoc() . "x-pubmed", 'pubmed:' . $aRef['PUBMED'])); } if (isset($aRef['AUTHORS'])) { parent::AddRDF(parent::triplifyString($ref_res, $this->getVoc() . "authors", $aRef['AUTHORS'])); } if (isset($aRef['COORDINATES'])) { parent::AddRDF(parent::triplify($refseq_res, $this->getVoc() . "reference", $ref_res) . parent::triplifyString($ref_res, $this->getVoc() . "coordinates", $aRef['COORDINATES']) . parent::triplifyString($ref_res, $this->getVoc() . "citation", $aRef['JOURNAL'])); } else { parent::AddRDF(parent::triplify($refseq_res, $this->getVoc() . "reference", $ref_res) . parent::triplifyString($ref_res, $this->getVoc() . "citation", $aRef['JOURNAL'])); } } $refseq_record_str = ""; $this->WriteRDFBufferToWriteFile(); continue; } } //while }
function CTD_chem_pathways_enriched() { $first = true; while ($l = $this->GetReadFile()->Read()) { if ($l[0] == '#') { continue; } // check number of columns $a = explode("\t", trim($l)); if ($first) { if (($c = count(explode("\t", $l))) != 11) { trigger_error("CTD_chem_pathways_enriched function expects 11 fields, found {$c}!" . PHP_EOL, E_USER_WARNING); return FALSE; } $first = false; } $chemical_id = $a[1]; $this->getRegistry()->parseQName($a[4], $pathway_ns, $pathway_id); if ($pathway_ns == "react") { $pathway_ns = "reactome"; } $pathway_resource_id = parent::getRes() . md5($chemical_id . $pathway_ns . $pathway_id . $a[6]); $pathway_resource_label = "Chemical-pathway association between mesh:" . $chemical_id . " and " . $pathway_ns . ":" . $pathway_id . " with p-value " . $a[6]; $this->AddRDF(parent::describeIndividual($pathway_resource_id, $pathway_resource_label, parent::getVoc() . "Chemical-Pathway-Association") . parent::describeClass(parent::getVoc() . "Chemical-Pathway-Association", "Chemical-Pathway Association") . parent::triplify($pathway_resource_id, $this->getVoc() . "pathway", $pathway_ns . ":" . $pathway_id) . parent::triplify($pathway_resource_id, parent::getVoc() . "chemical", "mesh:" . $chemical_id) . parent::triplifyString($pathway_resource_id, $this->getVoc() . "p-value", $a[6], "xsd:double")); parent::WriteRDFBufferToWriteFile(); } return TRUE; }
function parseEntry($lfile) { $fp = fopen($lfile, "r"); while ($l = fgets($fp, 100000)) { $k_t = trim(substr($l, 0, 12)); $v = trim(substr($l, 12)); if (!$k_t and $v == '') { continue; } // set the key to the current key if not empty, else keep using what was there before if (!isset($k)) { $k = $k_t; } else { if (!empty($k_t)) { $k = $k_t; } } if ($k == "///" or $k == "ENTRY1") { break; } if ($k == "ENTRY") { $a = explode(" ", $v, 2); $e['id'] = str_replace(array("EC ", " "), "", $a[0]); if (isset($this->org)) { $e['id'] = $this->org . "_" . $e['id']; } $e['type'] = trim(str_replace(array("Complete ", "Pathway Module"), array("", "Pathway Module"), $a[1])); $e['type_label'] = str_replace(" ", "-", $e['type']); $uri = parent::getNamespace() . $e['id']; continue; } // key with value if (in_array($k, array("NAME", "DESCRIPTION", "DEFINITION", "EQUATION", "COMMENT"))) { if ($k == "NAME") { parent::addRDF(parent::describeIndividual($uri, $v, parent::getVoc() . $e['type']) . parent::describeClass(parent::getVoc() . $e['type'], $e['type_label']) . parent::triplify($uri, "rdfs:seeAlso", "http://www.kegg.jp/dbget-bin/www_bget?" . $e['id'])); if ($e['type'] == 'Genome') { $a = explode(",", $v); parent::addRDF(parent::triplify($uri, "owl:sameAs", "kegg:" . $a[0])); } } else { if ($k == "DESCRIPTION") { parent::addRDF(parent::triplifyString($uri, "dc:description", $v)); } else { if ($k == "DEFINITION" and $e['type'] == "KO") { preg_match("/\\[([^\\]]+)\\]/", $v, $m); if (isset($m[1])) { parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-ec", $m[1])); } } else { if ($k == "COMMENT") { preg_match("/ICD-O: ([^,]+),/", $v, $m); if (isset($m[1])) { parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-icdo", "icdo:" . $m[1])); continue; } } else { parent::addRDF(parent::triplifyString($uri, parent::getVoc() . strtolower($k), $v)); } } } } continue; } if ($k == "RPAIR" and $e['type'] == "Reaction") { $list = explode(" ", $v); $id = parent::getRes() . $e['id'] . "." . $list[2] . "." . $list[3]; $rc = ''; if (isset($list[4])) { $rc = "kegg:" . substr($list[4], 4, -1); } parent::addRDF(parent::describeIndividual($id, $e['id'] . " " . $v, parent::getVoc() . "RPair-Role") . parent::describeClass(parent::getVoc() . "RPair-Role", "RPair Role") . parent::triplify($id, parent::getVoc() . "rpair", "kegg:" . $list[0]) . parent::triplifyString($id, parent::getVoc() . "role", $list[3]) . ($rc != '' ? parent::triplify($id, parent::getVoc() . "reaction-center", $rc) : '') . parent::triplify($uri, parent::getVoc() . "rpair-role", $id)); continue; } // list of entries if (in_array($k, array("ENZYME", "RPAIR", "RELATEDPAIR")) or in_array($e['type'], array("Compound", "RClass", "RPair")) and $k == "REACTION") { $list = explode(" ", $v); foreach ($list as $id) { if (!$id) { continue; } parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:{$id}")); } continue; } // key with semi-colon separated values if (in_array($k, array("CLASS", "CATEGORY", "KEYWORDS", "CHROMOSOME", "ANNOTATION", "ACTIVITY", "TYPE"))) { $a = explode(";", $v); foreach ($a as $c) { parent::addRDF(parent::triplifyString($uri, parent::getVoc() . strtolower($k), trim($c))); } continue; } // kegg seems to make a prefix mistake with the pathway identifiers... if ($k == "PATHWAY") { $a = explode(" ", $v, 2); preg_match("/[a-z]+([0-9]{5})/", $a[0], $m); if (isset($m[1])) { parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:map" . $m[1])); } else { echo "pathway problem: " . $v . PHP_EOL; } continue; } // multi-line header with key-value pair if (in_array($k, array("PATHWAY_MAP", "STR_MAP", "MODULE", "DISEASE", "KO_PATHWAY", "COMPOUND"))) { // PATHWAY_MAP map00010 Glycolysis / Gluconeogenesis $a = explode(" ", $v, 2); $mid = $a[0]; if (strpos($a[0], '(') !== FALSE) { $mid = substr($a[0], 0, strpos($a[0], '(')); } if (isset($this->org) and $k == "MODULE") { $mid = substr($mid, strpos($v, "_") + 1); } parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:" . $mid)); continue; } // REACTION parsing if (preg_match("/\\[RN:([^\\]]+)]/", $v, $m) != FALSE) { $list = explode(" ", $m[1]); foreach ($list as $item) { parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:" . $item)); } continue; } if ($k == "DRUG") { preg_match("/\\[DR:([^\\]]+)]/", $v, $m); if (isset($m[1])) { $list = explode(" ", $m[1]); foreach ($list as $item) { parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:" . $item)); } continue; } } if ($k == "TAXONOMY") { parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:" . str_replace("TAX", "taxonomy", $v))); continue; } // a list of objects to parse out that are defined within square brackets if (in_array($k, array("SOURCE", "COMPONENT"))) { preg_match_all("/\\[([^\\]]+)\\]/", $v, $m); if (isset($m[1])) { foreach ($m[1] as $id) { $myid = str_replace(array("TAX", "CPD", "DR"), array("taxonomy", "kegg", "kegg"), $id); parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), $myid)); } continue; } } // multi-line header with multi-key single value pair if (in_array($k, array("ORTHOLOGY", "REACTION"))) { // K00844,K12407,K00845 hexokinase/glucokinase [EC:2.7.1.1 2.7.1.2] [RN:R01786] // R01786,R02189,R09085 C00267 -> C00668 $a = explode(" ", $v, 2); $ids = explode(",", $a[0]); if ($k == "REACTION" and $ids[0][0] != "R") { echo "unable to parse {$k}" . PHP_EOL; continue; } if (!isset($a[1])) { if ($e['type'] == "Reaction") { parent::addRDF(parent::triplify($uri, parent::getVoc() . "orthology", "kegg:" . trim($a[0]))); continue; } echo "parse error: " . $k . " " . $v . PHP_EOL; continue; } $str = $a[1]; foreach ($ids as $id) { $o = ''; $o['id'] = $id; $o['label'] = $str; $o['type'] = strtolower($k); parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:{$id}")); } continue; } if ($k == "DBLINKS") { // DBLINKS GO: 0006096 0006094 $a = explode(": ", $v, 2); $ns = str_replace(array("ncbi-geneid", "ncbi-gi", "rn", "pubchem", "pdb-ccd", "icd-10", "um-bbd", "iubmb enzyme nomenclature", "explorenz - the enzyme database", "expasy - enzyme nomenclature database", "umbbd (biocatalysis/biodegradation database)", "brenda, the enzyme database"), array("ncbigene", "gi", "kegg", "pubchem.compound", "ccd", "icd10", "umbbd", "ec", "ec", "ec", "ec", "ec"), strtolower($a[0])); $ids = explode(" ", $a[1]); foreach ($ids as $id) { if (!$id) { continue; } parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-{$ns}", "{$ns}:{$id}")); } continue; } if ($k == "REMARK") { preg_match("/Same as: ([A-Z0-9]+)/", $v, $m); if (isset($m[1])) { parent::addRDF(parent::triplify($uri, parent::getVoc() . "same-as", "kegg:" . $m[1])); continue; } preg_match("/ATC code: (.*)/", $v, $m); if (isset($m[1])) { $list = explode(" ", $m[1]); foreach ($list as $item) { parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-atc", "atc:" . $item)); } continue; } preg_match("/Therapeutic category: (.*)/", $v, $m); if (isset($m[1])) { $list = explode(" ", $m[1]); foreach ($list as $item) { parent::addRDF(parent::triplifyString($uri, parent::getVoc() . "therapeutic-category", $item)); } continue; } preg_match("/Drug group: (.*)/", $v, $m); if (isset($m[1])) { $list = explode(" ", $m[1]); foreach ($list as $item) { parent::addRDF(parent::triplify($uri, parent::getVoc() . "drug-group", "kegg:" . $item)); } continue; } } if ($k == "PRODUCT" or $k == "SUBSTRATE") { preg_match("/([a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})/", $v, $m); if (isset($m[1])) { parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-dailymed", "dailymed:" . $m[1]) . parent::triplifyString("dailymed:" . $m[1], "rdfs:label", $v)); continue; } preg_match("/\\[CPD:([^\\]]+)\\]/", $v, $m); if (isset($m[1])) { parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:" . $m[1])); continue; } } if ($k == "STATISTICS") { $a = explode(": ", $v); parent::addRDF(parent::triplifyString($uri, parent::getVoc() . str_replace(" ", "-", strtolower($a[0])), $a[1])); continue; } if ($k == "ORGANISM") { $a = explode(" ", $v); parent::addRDF(parent::triplify($uri, parent::getVoc() . "organism", "kegg:" . $a[0])); continue; } if ($k == "REFERENCE") { if (!isset($ref)) { $ref = 1; } else { if (!isset($e['reference'][$ref]['title'])) { continue; } // this is a bug where the reference declaration is split onto two lines $ref++; } if (strstr($v, "PMID")) { // PMID:11529849 (marker) preg_match("/(PMID:[0-9]+) /", $v, $m); if (isset($m[1])) { $e['reference'][$ref]['pubmed'] = $m[1]; } } continue; } if ($k == "AUTHORS") { $e['reference'][$ref]['authors'] = $v; continue; } if ($k == "TITLE") { $e['reference'][$ref]['title'] = $v; continue; } if ($k == "JOURNAL") { $e['reference'][$ref]['journal'] = $v; continue; } if ($e['type'] == "Disease" and ($k == "GENE" or $k == "MARKER")) { // BCR-ABL (translocation) [HSA:613 25] [KO:K08878 K06619] preg_match_all("/ \\[([^\\]]+)\\]/", $v, $m); if (isset($m[1])) { foreach ($m[1] as $idlist) { $a = explode(":", $idlist); $ns = $a[0]; $b = explode(" ", $a[1]); foreach ($b as $id) { if ($ns == "KO") { $rel = "ko-" . strtolower($k); $gene = $id; } else { $rel = strtolower($k); $gene = $ns . "_" . $id; } parent::addRDF(parent::triplify($uri, parent::getVoc() . $rel, "kegg:{$gene}")); } } } else { echo $v; } continue; } if ($k == "GENES") { // ATH: AT1G32780 AT1G64710 AT1G77120(ADH1) AT5G24760 $a = explode(": ", $v); $org = $a[0]; $b = explode(" ", $a[1]); foreach ($b as $id) { $c = explode("(", $id); $gene = parent::getNamespace() . $org . "_" . $c[0]; parent::addRDF(parent::triplify($uri, parent::getVoc() . "gene", $gene)); } //echo parent::getRDF();exit; continue; } if ($k == "DRUG_TARGET") { // Afatinib: D09724 D09733 $s = substr($v, strpos($v, ":") + 2); $list = explode(" ", $s); foreach ($list as $item) { parent::addRDF(parent::triplify($uri, parent::getVoc() . "drug-target", "kegg:{$item}")); } continue; } if ($k == "STRUCTURE") { $list = explode(" ", $v); foreach ($list as $item) { if (trim($item) == '') { continue; } parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-pdb", "pdb:{$item}")); } continue; } if ($k == "MOTIF") { $list = explode(" ", $v); foreach ($list as $item) { parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-pfam", "pfam:{$item}")); } continue; } if (in_array($k, array("INTERACTION", "METABOLISM", "TARGET"))) { // dopamine D2-receptor antagonist [HSA:1813] [KO:K04145] $id = parent::getRes() . md5($uri . $v); $type = ucfirst(strtolower($k)); if (in_array($k, array("INTERACTION", "METABOLISM"))) { $a = explode(":", $v, 2); $modifier = $a[0]; } else { $modifier = ''; $s = substr($v, 0, strpos($v, "[") + 1); // dopamine D2-receptor antagonist [ preg_match("/ ([a-z]+) \\[/", $s, $m); if (isset($m[1])) { $modifier = $m[1]; } } parent::addRDF(parent::describeIndividual($id, $v, parent::getVoc() . $type) . parent::describeClass(parent::getVoc() . $type, $type) . parent::triplifyString($id, parent::getVoc() . "modifier", $modifier) . parent::triplify($uri, parent::getVoc() . strtolower($k), $id)); preg_match_all("/ \\[([^\\]]+)\\]/", $v, $m); if (isset($m[1])) { foreach ($m[1] as $item) { if (!strstr($item, "KO")) { $item = "kegg:" . str_replace(":", "_", $item); } else { $item = str_replace("KO:", "kegg:", $item); } parent::addRDF(parent::triplify($id, parent::getVoc() . "link", $item)); } } continue; } // skip these if (in_array($k, array("ATOM", "BOND", "BRITE", "AASEQ", "NTSEQ", "SEQUENCE"))) { continue; } // simple strings to keep as is if (in_array($k, array("EXACT_MASS", "FORMULA", "MOL_WEIGHT", "LINEAGE", "LENGTH", "MASS", "COMPOSITION", "NODE", "EDGE", "POSITION"))) { parent::addRDF(parent::triplifyString($uri, parent::getVoc() . strtolower($k), $v)); continue; } // default catchall parent::addRDF(parent::triplifyString($uri, parent::getVoc() . strtolower($k), $v . " [script:default]")); } if (isset($e['reference'])) { foreach ($e['reference'] as $i => $r) { $ref = parent::getRes() . $e['id'] . ".ref.{$i}"; parent::addRDF(parent::describeIndividual($ref, $r['title'], parent::getVoc() . "Reference") . parent::describeClass(parent::getVoc() . "Reference", "Reference") . parent::triplifyString($ref, parent::getVoc() . "authors", $r['authors']) . parent::triplifyString($ref, parent::getVoc() . "journal", $r['journal']) . parent::triplify($uri, parent::getVoc() . "reference", $ref)); if (isset($r['pubmed'])) { parent::addRDF(parent::triplify($ref, parent::getVoc() . "x-pubmed", $r['pubmed'])); } } } fclose($fp); }
function MGI_Geno_NotDisease() { $line = 1; while ($l = $this->getReadFile()->read(248000)) { $a = explode("\t", $l); if (count($a) != 8) { trigger_error("Incorrect number of columns", E_USER_WARNING); continue; } $genotype = $a[0]; $alleles = explode("|", strtolower($a[2])); $diseases = explode(",", $a[7]); foreach ($diseases as $d) { $disease = "omim:{$d}"; foreach ($alleles as $allele) { $id = parent::getRes() . md5($allele . $disease); $label = "{$allele} {$disease} absent association"; parent::addRDF(parent::describeIndividual($id, $label, $this->getVoc() . "Allele-Disease-Non-Association") . parent::describeClass($this->getVoc() . "Allele-Disease-Non-Association", "MGI Allele-Disease Non-Association") . parent::triplify($id, $this->getVoc() . "allele", $allele) . parent::triplifyString($id, $this->getVoc() . "genotype-string", $genotype) . parent::triplify($id, $this->getVoc() . "disease", $disease) . parent::triplifyString($id, $this->getVoc() . "is-negated", "true")); if ($a[5]) { $pmids = explode(",", $a[5]); foreach ($pmids as $pmid) { parent::addRDF(parent::triplify($id, $this->getVoc() . "x-pubmed", "pubmed:" . $pmid)); } } } } $this->writeRDFBufferToWriteFile(); } }
function ParseEntry($obj, $type) { $o = $obj["omim"]["entryList"][0]["entry"]; $omim_id = $o['mimNumber']; $omim_uri = parent::getNamespace() . $o['mimNumber']; if (isset($o['version'])) { parent::setDatasetVersion($o['version']); } // add the links parent::addRDF($this->QQuadO_URL($omim_uri, "rdfs:seeAlso", "http://omim.org/entry/" . $omim_id)); parent::addRDF($this->QQuadO_URL($omim_uri, "owl:sameAs", "http://identifiers.org/omim/" . $omim_id)); // parse titles $titles = $o['titles']; parent::addRDF(parent::describeIndividual($omim_uri, $titles['preferredTitle'], parent::getVoc() . str_replace(array(" ", "/"), "-", ucfirst($type))) . parent::describeClass(parent::getVoc() . str_replace(array(" ", "/"), "-", ucfirst($type)), $type)); if (isset($titles['preferredTitle'])) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "preferred-title", $titles['preferredTitle'])); } if (isset($titles['alternativeTitles'])) { $b = explode(";;", $titles['alternativeTitles']); foreach ($b as $title) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "alternative-title", trim($title))); } } // parse text sections if (isset($o['textSectionList'])) { foreach ($o['textSectionList'] as $i => $section) { if ($section['textSection']['textSectionTitle'] == "Description") { parent::addRDF(parent::triplifyString($omim_uri, "dc:description", $section['textSection']['textSectionContent'])); } else { $p = str_replace(" ", "-", strtolower($section['textSection']['textSectionTitle'])); parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "{$p}", $section['textSection']['textSectionContent'])); } // parse the omim references preg_match_all("/\\{([0-9]{6})\\}/", $section['textSection']['textSectionContent'], $m); if (isset($m[1][0])) { foreach ($m[1] as $oid) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "refers-to", "omim:{$oid}")); } } } } // allelic variants if (isset($o['allelicVariantList'])) { foreach ($o['allelicVariantList'] as $i => $v) { $v = $v['allelicVariant']; $uri = parent::getRes() . "{$omim_id}" . "_allele_" . $i; $label = str_replace("\n", " ", $v['name']); parent::addRDF(parent::describeIndividual($uri, $label, parent::getVoc() . "Allelic-Variant") . parent::describeClass(parent::getVoc() . "Allelic-Variant", "Allelic Variant")); if (isset($v['alternativeNames'])) { $names = explode(";;", $v['alternativeNames']); foreach ($names as $name) { $name = str_replace("\n", " ", $name); parent::addRDF(parent::triplifyString($uri, parent::getVoc() . "alternative-names", $name)); } } if (isset($v['text'])) { parent::addRDF(parent::triplifyString($uri, "dc:description", $v['text'])); } if (isset($v['mutations'])) { parent::addRDF(parent::triplifyString($uri, parent::getVoc() . "mutation", $v['mutations'])); } if (isset($v['dbSnps'])) { $snps = explode(",", $v['dbSnps']); foreach ($snps as $snp) { parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-dbsnp", "dbsnp:" . $snp)); } } parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "variant", $uri)); } } // clinical synopsis if (isset($o['clinicalSynopsis'])) { $cs = $o['clinicalSynopsis']; $cs_uri = parent::getRes() . "" . $omim_id . "_cs"; parent::addRDF(parent::describeIndividual($cs_uri, "Clinical synopsis for omim {$omim_id}", parent::getVoc() . "Clinical-Synopsis") . parent::describeClass(parent::getVoc() . "Clinical-Synopsis", "Clinical Synopsis") . parent::triplify($omim_uri, parent::getVoc() . "clinical-synopsis", $cs_uri)); foreach ($cs as $k => $v) { if (!strstr($k, "Exists")) { // ignore the boolean assertion. // @todo ignore provenance for now if (in_array($k, array('contributors', 'creationDate', 'editHistory', 'epochCreated', 'dateCreated', 'epochUpdated', 'dateUpdated'))) { continue; } if (!is_array($v)) { $v = array($k => $v); } foreach ($v as $k1 => $v1) { $phenotypes = explode(";", $v1); foreach ($phenotypes as $coded_phenotype) { // parse out the codes $coded_phenotype = trim($coded_phenotype); if (!$coded_phenotype) { continue; } $phenotype = preg_replace("/\\{.*\\}/", "", $coded_phenotype); $phenotype_id = parent::getRes() . "" . md5(strtolower($phenotype)); $entity_id = parent::getRes() . "" . $k1; parent::addRDF(parent::describeIndividual($phenotype_id, $phenotype, parent::getVoc() . 'Characteristic') . parent::describeClass(parent::getVoc() . 'Characteristic', 'Characteristic') . parent::triplify($cs_uri, parent::getVoc() . "feature", $phenotype_id) . parent::describeIndividual($entity_id, $k1, parent::getVoc() . "Entity") . parent::describeClass(parent::getVoc() . "Entity", "Entity") . parent::triplify($phenotype_id, parent::getVoc() . "characteristic-of", $entity_id)); // parse out the vocab references preg_match_all("/\\{([0-9A-Za-z \\:\\-\\.]+)\\}|;/", $coded_phenotype, $codes); //preg_match_all("/((UMLS|HPO HP|SNOMEDCT|ICD10CM|ICD9CM|EOM ID)\:[A-Z0-9]+)/",$coded_phenotype,$m); if (isset($codes[1][0])) { foreach ($codes[1] as $entry) { $entries = explode(" ", trim($entry)); foreach ($entries as $e) { if ($e == "HPO" || $e == "EOM") { continue; } $this->getRegistry()->parseQName($e, $ns, $id); if (!isset($ns) || $ns == '') { $b = explode(".", $id); $ns = "omim"; $id = $b[0]; } else { $ns = str_replace(array("hpo", "id", "icd10cm", "icd9cm", "snomedct"), array("hp", "eom", "icd10", "icd9", "snomed"), $ns); } parent::addRDF(parent::triplify($phenotype_id, parent::getVoc() . "x-{$ns}", "{$ns}:{$id}")); } // foreach } // foreach } // codes } //foreach } // foreach } // exists } } // clinical synopsis // genemap if (isset($o['geneMap'])) { $map = $o['geneMap']; if (isset($map['chromosome'])) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "chromosome", (string) $map['chromosome'])); } if (isset($map['cytoLocation'])) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "cytolocation", (string) $map['cytoLocation'])); } if (isset($map['geneSymbols'])) { $b = preg_split("/[,;\\. ]+/", $map['geneSymbols']); foreach ($b as $symbol) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "gene-symbol", "symbol:" . trim($symbol))); } } if (isset($map['geneName'])) { $b = explode(",", $map['geneName']); foreach ($b as $name) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "gene-name", trim($name))); } } if (isset($map['mappingMethod'])) { $b = explode(",", $map['mappingMethod']); foreach ($b as $c) { $mapping_method = trim($c); $method_uri = $this->get_method_type($mapping_method); if ($method_uri !== false) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "mapping-method", $method_uri)); } } } if (isset($map['mouseGeneSymbol'])) { $b = explode(",", $map['mouseGeneSymbol']); foreach ($b as $c) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "mouse-gene-symbol", "symbol:" . strtoupper($c))); } } if (isset($map['mouseMgiID'])) { $b = explode(",", $map['mouseMgiID']); foreach ($b as $c) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-mgi", $c)); } } if (isset($map['geneInheritance']) && $map['geneInheritance'] != '') { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "gene-inheritance", $map['geneInheritance'])); } } if (isset($o['phenotypeMapList'])) { foreach ($o['phenotypeMapList'] as $i => $phenotypeMap) { $phenotypeMap = $phenotypeMap['phenotypeMap']; $pm_uri = parent::getRes() . $omim_id . "_pm_" . ($i + 1); parent::addRDF(parent::describeIndividual($pm_uri, "phenotype mapping for {$omim_id}", parent::getVoc() . "Phenotype-Map") . parent::describeClass(parent::getVoc() . "Phenotype-Map", "OMIM Phenotype-Map") . parent::triplify($omim_uri, parent::getVoc() . "phenotype-map", $pm_uri)); foreach (array_keys($phenotypeMap) as $k) { if (in_array($k, array("mimNumber", "phenotypeMimNumber", "phenotypicSeriesMimNumber"))) { parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . $k, "omim:" . $phenotypeMap[$k])); } else { if ($k == "geneSymbols") { $l = explode(", ", $phenotypeMap[$k]); foreach ($l as $gene) { parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . "gene-symbol", "hgnc.symbol:" . $gene)); } } else { if ($k == "phenotypeMappingKey") { $l = $this->get_phenotype_mapping_method_type($phenotypeMap[$k]); parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . "mapping-method", $l)); } else { parent::addRDF(parent::triplifyString($pm_uri, parent::getVoc() . $k, $phenotypeMap[$k])); } } } } } } // references if (isset($o['referenceList'])) { foreach ($o['referenceList'] as $i => $r) { $r = $r['reference']; if (isset($r['pubmedID'])) { $pubmed_uri = "pubmed:" . $r['pubmedID']; parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "article", $pubmed_uri)); $title = 'article'; if (isset($r['title'])) { $title = $r['title']; } parent::addRDF(parent::describe($pubmed_uri, addslashes($r['title']))); if (isset($r['articleUrl'])) { parent::addRDF($this->QQuadO_URL($pubmed_uri, "rdfs:seeAlso", htmlentities($r['articleUrl']))); } } } } // external ids if (isset($o['externalLinks'])) { foreach ($o['externalLinks'] as $k => $id) { if ($id === false) { continue; } $ns = ''; switch ($k) { case 'approvedGeneSymbols': $ns = 'symbol'; break; case 'geneIDs': $ns = 'ncbigene'; break; case 'ncbiReferenceSequences': $ns = 'gi'; break; case 'genbankNucleotideSequences': $ns = 'gi'; break; case 'proteinSequences': $ns = 'gi'; break; case 'uniGenes': $ns = 'unigene'; break; case 'ensemblIDs': $ns = 'ensembl'; break; case 'swissProtIDs': $ns = 'uniprot'; break; case 'mgiIDs': $ns = 'mgi'; $b = explode(":", $id); $id = $b[1]; break; case 'flybaseIDs': $ns = 'flybase'; break; case 'zfinIDs': $ns = 'zfin'; break; case 'hprdIDs': $ns = 'hprd'; break; case 'orphanetDiseases': $ns = 'orphanet'; break; case 'refSeqAccessionIDs': $ns = 'refseq'; break; case 'ordrDiseases': $ns = 'ordr'; $b = explode(";;", $id); $id = $b[0]; break; case 'snomedctIDs': $ns = 'snomed'; break; case 'icd10cmIDs': $ns = 'icd10'; break; case 'icd9cmIDs': $ns = 'icd9'; break; case 'umlsIDs': $ns = 'umls'; break; case 'wormbaseIDs': $ns = 'wormbase'; break; case 'diseaseOntologyIDs': $ns = 'do'; break; // specifically ignorning // specifically ignorning case 'geneTests': case 'cmgGene': case 'geneticAllianceIDs': // # // # case 'nextGxDx': case 'nbkIDs': // NBK1207;;Alport Syndrome and Thin Basement Membrane Nephropathy // NBK1207;;Alport Syndrome and Thin Basement Membrane Nephropathy case 'newbornScreeningUrls': case 'decipherUrls': case 'geneReviewShortNames': case 'locusSpecificDBs': case 'geneticsHomeReferenceIDs': case 'omiaIDs': case 'coriellDiseases': case 'clinicalDiseaseIDs': case 'possumSyndromes': case 'keggPathways': case 'gtr': case 'gwasCatalog': case 'mgiHumanDisease': case 'wormbaseDO': case 'dermAtlas': // true/false break; default: echo "unhandled external link {$k} {$id}" . PHP_EOL; } $ids = explode(",", $id); foreach ($ids as $id) { if ($ns) { if (strstr($id, ";;") === FALSE) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-{$ns}", $ns . ':' . $id)); } else { $b = explode(";;", $id); // multiple ids//names foreach ($b as $c) { preg_match("/([a-z])/", $c, $m); if (!isset($m[1])) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-{$ns}", $ns . ':' . $c)); } } } } } } } //external links }
function Parse() { $l = parent::getReadFile()->read(100000); $header = explode("\t", trim(substr($l, 1))); if (($c = count($header)) != 54) { trigger_erorr("Expecting 54 columns, found {$c}!"); return FALSE; } // check # of columns while ($l = parent::getReadFile()->read(500000)) { $a = explode("\t", trim($l)); // irefindex identifiers $rigid = "irefindex." . $a[34]; # checksum for interaction $rogida = "irefindex." . $a[32]; # checksum for A $rogidb = "irefindex." . $a[33]; # checksum for B $irigid = "irefindex.irigid:" . $a[44]; # integer id for interaction $irogida = "irefindex.irogid:" . $a[42]; # integer id for A $irogidb = "irefindex.irogid:" . $a[43]; # integer id for B $crigid = "irefindex.crigid:" . $a[47]; # checksum for canonical interaction $icrigid = "irefindex.icrigid:" . $a[50]; # integer id for canonical interaction $crogida = "irefindex.crogid:" . $a[45]; # checksum for A's canonical group $crogidb = "irefindex.crogid:" . $a[46]; # checksum for B's canonical group $icrogida = "irefindex.icrogid:" . $a[48]; # integer for A's canonical group $icrogidb = "irefindex.icrogid:" . $a[49]; # integer for B's canonical group // 13 contains the original identifier, the rigid, and the edgetype $ids = explode("|", $a[13]); if (count($ids) != 3) { trigger_error("Expecting 3 entries in column 14"); print_r($ids); exit; } parent::getRegistry()->parseQName($ids[0], $ns, $id); if ($id == '-') { // this happens with hprd $iid = "hprd:" . substr($ids[1], 6); } else { $iid = $ns . ":" . $id; } // get the type if ($a[52] == "X") { $label = "{$a['0']} - {$a['1']} Interaction"; $type = "Pairwise-Interaction"; } else { if ($a[52] == "C") { $label = $a[53] . " component complex"; #num of participants $type = "Multimeric-Complex"; } else { if ($a[52] == "Y") { $label = "{$a['0']} homomeric complex"; $type = "Homopolymeric-Complex"; } } } parent::addRDF(parent::describeIndividual($iid, $label, parent::getVoc() . $type) . parent::describeClass(parent::getVoc() . $type, str_replace("-", " ", $type))); // interaction type[52] by method[6] unset($method); if ($a[6] != '-') { $data = $this->ParseStringArray($a[6]); $method = trim($data["label"]); $qname = trim($data["ns"]) . ":" . trim($data["id"]); if ($qname) { parent::addRDF(parent::triplify($iid, parent::getVoc() . "method", $qname) . parent::describeClass($qname, $data['label'])); } } parent::addRDF(parent::triplify($iid, "rdfs:seeAlso", "http://wodaklab.org/iRefWeb/interaction/show/" . $a[50])); // set the interactors for ($i = 0; $i <= 1; $i++) { $p = 'a'; if ($i == 1) { $p = 'b'; } $data = $this->ParseStringArray($a[$i]); $interactor = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}", $interactor)); // biological role $role = $a[16 + $i]; if ($role != '-') { $data = $this->ParseStringArray($role); $qname = trim($data["ns"]) . ":" . trim($data["id"]); if ($qname != "mi:0000") { parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}" . "_biological_role", $qname) . parent::describeClass($qname, $data['label'])); } } // experimental role $role = $a[18 + $i]; if ($role != '-') { $data = $this->ParseStringArray($role); $qname = trim($data["ns"]) . ":" . trim($data["id"]); if ($qname != "mi:0000") { parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}" . "_experimental_role", $qname) . parent::describeClass($qname, $data['label'])); } } // interactor type $type = $a[20 + $i]; if ($type != '-') { $data = $this->ParseStringArray($type); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($interactor, "rdf:type", $qname) . parent::describeClass($qname, $data['label'])); } } // add the alternatives through the taxon + seq redundant group for ($i = 2; $i <= 3; $i++) { $taxid = ''; $rogid = "irefindex." . $a[32 + ($i - 2)]; parent::addRDF(parent::describeIndividual($rogid, "", parent::getVoc() . "Taxon-Sequence-Identical-Group") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Identical-Group", "Taxon + Sequence Identical Group")); $tax = $a[9 + ($i - 2)]; if ($tax && $tax != '-' && $tax != '-1') { $data = $this->ParseStringArray($tax); $taxid = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($rogid, parent::getVoc() . "x-taxonomy", $taxid)); } $list = explode("|", $a[3 + ($i - 2)]); foreach ($list as $item) { $data = $this->ParseStringArray($item); $ns = trim($data["ns"]); $id = trim($data["id"]); $qname = $ns . ":" . $id; if ($ns && $ns != 'rogid' && $ns != 'irogid' and $id != '-') { parent::addRDF(parent::triplify($rogid, parent::getVoc() . "has-member", $qname)); if ($taxid && $taxid != '-' && $taxid != '-1') { parent::addRDF(parent::triplify($qname, parent::getVoc() . "x-taxonomy", $taxid)); } } } } // publications $list = explode("|", $a[8]); foreach ($list as $item) { if ($item == '-' && $item != 'pubmed:0') { continue; } $data = $this->ParseStringArray($item); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, parent::getVoc() . "article", $qname)); } // MI interaction type if ($a[11] != '-' && $a[11] != 'NA') { $data = $this->ParseStringArray($a[11]); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, "rdf:type", $qname)); if (!isset($defined[$qname])) { $defined[$qname] = ''; parent::addRDF(parent::triplifyString($qname, "rdfs:label", $data['label'])); } } // source if ($a[12] != '-') { $data = $this->ParseStringArray($a[12]); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, parent::getVoc() . "source", $qname)); } // confidence $list = explode("|", $a[14]); foreach ($list as $item) { $data = $this->ParseStringArray($item); $ns = trim($data["ns"]); $id = trim($data["id"]); if ($ns == 'lpr') { // lowest number of distinct interactions that any one article reported parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "minimum-number-interactions-reported", $id)); } else { if ($ns == "hpr") { // higher number of distinct interactions that any one article reports parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "maximum-number-interactions-reported", $id)); } else { if ($ns = 'hp') { // total number of unique PMIDs used to support the interaction parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "number-supporting-articles", $id)); } } } } // expansion method if ($a[15]) { $id = parent::getRes() . md5($a[15]); parent::addRDF(parent::describeIndividual($id, $a[15], parent::getVoc() . "Expansion-Method") . parent::describeClass(parent::getVoc() . "Expansion-Method", "Expansion Method") . parent::triplify($iid, parent::getVoc() . "expansion-method", $id)); } // host organism if ($a[28] != '-') { $data = $this->ParseStringArray($a[28]); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, parent::getVoc() . "host-organism", $qname)); } // @todo add to record // created 2010/05/18 $date = str_replace("/", "-", $a[30]) . "T00:00:00Z"; parent::addRDF(parent::triplifyString($iid, "dc:created", $date, "xsd:dateTime")); // taxon-sequence identical interaction group parent::addRDF(parent::triplify($iid, parent::getVoc() . "taxon-sequence-identical-interaction", $rigid) . parent::triplify($rigid, "rdf:type", parent::getVoc() . "Taxon-Sequence-Identical-Interaction") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Identical-Interaction", "Taxon + Sequence Identical Interaction") . parent::triplify($rigid, parent::getVoc() . "irigid", $irigid) . parent::triplify($rigid, parent::getVoc() . "interactor-a", $rogida) . parent::triplify($rogida, parent::getVoc() . "irogid", $irogida) . parent::triplify($rigid, parent::getVoc() . "interactor-b", $rogidb) . parent::triplify($rogidb, parent::getVoc() . "irogid", $irogidb) . parent::triplify($rogida, parent::getVoc() . "canonical-group", $crogida) . parent::triplify($rogidb, parent::getVoc() . "canonical-group", $crogidb) . parent::triplify($rigid, parent::getVoc() . "taxon-sequence-similar-interaction", $crigid) . parent::triplify($crigid, "rdf:type", parent::getVoc() . "Taxon-Sequence-Canonical-Interaction") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Canonical-Interaction", "Taxon + Sequence Canonical Interaction") . parent::triplify($crigid, parent::getVoc() . "icrigid", $icrigid) . parent::triplify($crigid, parent::getVoc() . "interactor-a-canonical-group", $crogida) . parent::triplify($crogida, "rdf:type", parent::getVoc() . "Taxon-Sequence-Similar-Group") . parent::triplify($crogida, parent::getVoc() . "icrogid", $icrogida) . parent::triplify($crigid, parent::getVoc() . "interactor-b-canonical-group", $crogidb) . parent::triplify($crogidb, "rdf:type", parent::getVoc() . "Taxon-Sequence-Similar-Group") . parent::triplify($crogidb, parent::getVoc() . "icrogid", $icrogidb) . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Similar-Group", "Taxon + Sequence Similar Group")); parent::writeRDFBufferToWriteFile(); } }
private function citations() { while ($l = $this->getReadFile()->read(2000000)) { $a = explode("\t|\t", rtrim($l, "\t|\n")); if (!isset($a[1]) or !isset($a[2])) { continue; } $c = parent::getRes() . "citation-id-" . $a[0]; $seealso = isset($a[4]) ? trim($a[4]) : ""; if ($seealso) { $seealso = str_replace(array("lx: DOI ", "http;//"), array("http://dx.doi.org/", "http://"), $seealso); if (strlen($seealso) > 2 and !strstr($seealso, "http")) { $seealso = "http://" . $seealso; } $seealso = parent::triplify($c, "rdfs:seeAlso", $seealso); } parent::addRDF(parent::describeIndividual($c, $a[1], $this->getVoc() . "Citation") . parent::describeClass($this->getVoc() . "Citation", "Citation") . parent::triplifyString($c, parent::getVoc() . "citation-key", $a[1]) . ($a[2] == "0" ? "" : parent::triplify($c, parent::getVoc() . "x-pubmed", "pubmed:" . $a[2])) . $seealso . ((isset($a[5]) and $a[5]) ? parent::triplifyString($c, parent::getVoc() . "text", str_replace("\"", "", $a[5])) : "")); if (isset($a[6])) { $taxids = explode(" ", trim($a[6])); if (count($taxids)) { foreach ($taxids as $taxid) { parent::addRDF(parent::triplify("taxonomy:{$taxid}", $this->getVoc() . "citation", $c)); } } } $this->writeRDFBufferToWriteFile(); } //while }
function AddText(&$x, $id, $list_name, $item_name, $predicate, $list_item_name = null) { if (isset($x->{$list_name})) { foreach ($x->{$list_name} as $item) { if (isset($item->{$item_name}) && $item->{$item_name} != '') { $l = $item->{$item_name}; if (isset($l->{$list_item_name})) { foreach ($l->{$list_item_name} as $k) { $kid = parent::getRes() . md5($k); $this->addRDF($this->describeIndividual($kid, "{$item_name} for {$id}", parent::getVoc() . ucfirst($item_name)) . $this->describeClass(parent::getVoc() . ucfirst($item_name), $item_name) . $this->triplifyString($kid, "rdf:value", $k) . $this->triplify($id, $predicate, $kid)); } } else { $kid = parent::getRes() . md5($l); $this->addRDF($this->describeIndividual($kid, "{$item_name} for {$id}", parent::getVoc() . ucfirst($item_name)) . $this->describeClass(parent::getVoc() . ucfirst($item_name), $item_name) . $this->triplifyString($kid, "rdf:value", $l) . $this->triplify($id, $predicate, $kid)); } } } } }
function genes($file) { $xml = new CXML($file); while ($xml->parse("DisorderList") == TRUE) { $x = $xml->GetXMLRoot(); foreach ($x->Disorder as $d) { $orphanet_id = parent::getNamespace() . (string) $d->OrphaNumber; $disorder_name = (string) $d->Name; foreach ($d->DisorderGeneAssociationList->DisorderGeneAssociation as $dga) { // gene $gene = $dga->Gene; $gene_id = parent::getNamespace() . (string) $gene->OrphaNumber; $gene_internal_id = (string) $gene->attributes()->id; $gene_label = (string) $gene->Name; $gene_symbol = (string) $gene->Symbol; parent::addRDF(parent::describeIndividual($gene_id, $gene_label, parent::getVoc() . "Gene") . parent::describeClass(parent::getVoc() . "Gene", "orphanet gene") . parent::triplifyString($gene_id, parent::getVoc() . "symbol", $gene_symbol)); foreach ($gene->SynonymList as $s) { $synonym = (string) $s->Synonym; parent::addRDF(parent::triplifyString($gene_id, parent::getVoc() . "synonym", $synonym)); } foreach ($gene->ExternalReferenceList as $erl) { $er = $erl->ExternalReference; $db = (string) $er->Source; $db = parent::getRegistry()->getPreferredPrefix($db); $id = (string) $er->Reference; $xref = "{$db}:{$id}"; parent::addRDF(parent::triplify($gene_id, parent::getVoc() . "x-{$db}", $xref)); } $dga_id = parent::getRes() . (string) $d->OrphaNumber . "_" . md5($dga->asXML()); $ga = $dga->DisorderGeneAssociationType; $ga_id = parent::getNamespace() . (string) $ga->attributes()->id; $ga_label = (string) $ga->Name; $s = $dga->DisorderGeneAssociationStatus; $s_id = parent::getNamespace() . (string) $s->attributes()->id; $s_label = (string) $s->Name; parent::addRDF(parent::describeIndividual($dga_id, "{$ga_label} {$gene_label} in {$disorder_name} ({$s_label})", $ga_id) . parent::describeClass($ga_id, $ga_label, parent::getVoc() . "Disorder-Gene-Association") . parent::triplify($dga_id, parent::getVoc() . "status", $s_id) . parent::describeClass($s_id, $s_label, parent::getVoc() . "Disorder-Gene-Association-Status") . parent::triplify($dga_id, parent::getVoc() . "disorder", $orphanet_id) . parent::describeIndividual($orphanet_id, $disorder_name, parent::getVoc() . "Disorder") . parent::triplify($dga_id, parent::getVoc() . "gene", $gene_id)); } parent::writeRDFBufferToWriteFile(); } } unset($xml); }
function product($fpin) { $z = 0; $list = ''; fgets($fpin); // header while ($l = fgets($fpin, 100000)) { $a = explode("\t", $l); if (count($a) != 18) { trigger_error("Expected 18 coloumns, instead found" . count($a)); continue; } $product_id = parent::getNamespace() . $a[0]; $product_label = $a[3]; $product_type_label = ucfirst(strtolower($a[2])); $product_type = parent::getVoc() . str_replace(" ", "-", $product_label); parent::addRDF(parent::describeIndividual($product_id, $a[3], parent::getVoc() . "Product") . parent::describeClass(parent::getVoc() . "Product", "NDC Product") . parent::triplify($product_id, parent::getVoc() . "product-type", $product_type) . parent::describeIndividual($product_type, $product_type_label, parent::getVoc() . "Product-Type") . parent::describeClass(parent::getVoc() . "Product-Type", "Product Type") . parent::triplifyString($product_id, parent::getVoc() . "product-id", $a[1]) . parent::triplifyString($product_id, parent::getVoc() . "proprietary-name", $a[3]) . parent::triplifyString($product_id, parent::getVoc() . "trade-name-suffix", $a[4])); if ($a[5]) { $b = explode(";", $a[5]); foreach ($b as $c) { parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "non-proprietary-name", trim($c))); } } if ($a[6]) { $b = explode(",", $a[6]); foreach ($b as $c) { $dosageform = strtolower($c); $dosageform_id = parent::getVoc() . str_replace(" ", "-", ucfirst(strtolower($c))); parent::addRDF(parent::describeIndividual($dosageform_id, $dosageform, parent::getVoc() . "Dosage-Form") . parent::describeClass(parent::getVoc() . "Dosage-Form", "NDC Dosage Form") . parent::triplify($product_id, parent::getVoc() . "dosage-form", $dosageform_id)); } } if ($a[7]) { // MV $b = explode("; ", $a[7]); foreach ($b as $c) { $route = strtolower(trim($c)); $route_id = parent::getVoc() . str_replace(" ", "-", ucfirst(strtolower($c))); parent::addRDF(parent::describeIndividual($route_id, $route, parent::getVoc() . "Route") . parent::describeClass(parent::getVoc() . "Route", "NDC Drug Route") . parent::triplify($product_id, parent::getVoc() . "route", $route_id)); } } if ($a[8]) { $date = substr(0, 4, $a[8]) . "-" . substr(4, 2, $a[8]) . "-" . substr(6, 2, $a[8]); parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "start-marketing-date", $date)); } if ($a[9]) { $date = substr(0, 4, $a[9]) . "-" . substr(4, 2, $a[9]) . "-" . substr(6, 2, $a[9]); parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "end-marketing-date", $date)); } if ($a[10]) { parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "marketing-category", $a[10])); } if ($a[11]) { parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "application-number", $a[11])); } // create a labeller node if ($a[12]) { $labeller_id = parent::getRes() . md5($a[12]); $label = addslashes($a[12]); parent::addRDF(parent::describeIndividual($labeller_id, $label, parent::getVoc() . "Labeller") . parent::describeClass(parent::getVoc() . "Labeller", "NDC Labeller") . parent::triplify($product_id, parent::getVoc() . "labeller", $labeller_id)); } // the next three are together if ($a[13]) { // MV $substances = explode(";", $a[13]); $strengths = explode(";", $a[14]); $units = explode(";", $a[15]); $l = ''; foreach ($substances as $i => $substance) { // list the active ingredient $ingredient_label = strtolower($substance); $strength = ''; if (isset($strengths[$i])) { $strength = $strengths[$i]; } $unit = $units[$i]; $ingredient_id = parent::getRes() . md5($ingredient_label); parent::addRDF(parent::describeIndividual($ingredient_id, $ingredient_label, parent::getVoc() . "Ingredient") . parent::describeClass(parent::getVoc() . "Ingredient", "NDC Ingredient") . parent::triplify($product_id, parent::getVoc() . "ingredient", $ingredient_id)); // describe the substance composition $substance_label = "{$strength} {$unit} {$ingredient_label}"; $substance_id = parent::getRes() . md5($substance_label); parent::addRDF(parent::describeIndividual($substance_id, $substance_label, parent::getVoc() . "Substance") . parent::triplifyString($substance_id, parent::getVoc() . "amount", $strength) . parent::describeClass(parent::getVoc() . "Substance", "NDC Substance")); $unit_id = parent::getVoc() . md5($unit); parent::addRDF(parent::describeIndividual($unit_id, $unit, parent::getVoc() . "Unit") . parent::describeClass(parent::getVoc() . "Unit", "NDC Unit") . parent::triplify($substance_id, parent::getVoc() . "amount_unit", $unit_id) . parent::triplify($product_id, parent::getVoc() . "has-part", $substance_id)); } } if ($a[16]) { // MV $b = explode(",", $a[16]); foreach ($b as $c) { $cat_id = parent::getVoc() . md5($c); parent::addRDF(parent::describeIndividual($cat_id, $c, parent::getVoc() . "Pharmacological-Class") . parent::describeClass(parent::getVoc() . "Pharmacological-Class", "NDC Pharmacological Class") . parent::triplify($product_id, parent::getVoc() . "pharmacological-class", $cat_id)); } } parent::WriteRDFBufferToWriteFile(); } }
function process() { $gb_record_str = ""; while ($aLine = $this->getReadFile()->Read(4096)) { preg_match("/^\\/\\/\$/", $aLine, $matches); if (count($matches)) { //now remove the header if it is there $gb_record_str = $this->removeHeader($gb_record_str); $sectionsRaw = $this->parseGenbankRaw($gb_record_str); /** * SECTIONS being parsed: * locus, definition, accession, version, keywords, segment, source, reference, features */ //get locus section(s) $locus = $this->retrieveSections("LOCUS", $sectionsRaw); $parsed_locus_arr = $this->parseLocus($locus); //get the definition section $definition = $this->retrieveSections("DEFINITION", $sectionsRaw); $parsed_definition_arr = $this->parseDefinition($definition); //get the accession $accessions = $this->retrieveSections("ACCESSION", $sectionsRaw); $parsed_accession_arr = $this->parseAccession($accessions); //get the version $versions = $this->retrieveSections("VERSION", $sectionsRaw); $parsed_version_arr = $this->parseVersion($versions); //get the keywords $keywords = $this->retrieveSections("KEYWORDS", $sectionsRaw); $parsed_keyword_arr = $this->parseKeywords($keywords); //may not be any segment section $segments = $this->retrieveSections("SEGMENT", $sectionsRaw); if (!empty($segments)) { $parsed_segments_arr = $this->parseSegment($segments); } $features = $this->retrieveSections("FEATURES", $sectionsRaw); $parsed_features_arr = $this->parseFeatures($features); //get the source section $source = $this->retrieveSections("SOURCE", $sectionsRaw); $parsed_source_arr = $this->parseSource($source); $contig = $this->retrieveSections("CONTIG", $sectionsRaw); if (!empty($contig)) { $parsed_contig_arr = $this->parseContig($contig); } //get the reference section $references = $this->retrieveSections("REFERENCE", $sectionsRaw); $parsed_refs_arr = $this->parseReferences($references); $gb_res = "gi:" . $parsed_version_arr['gi']; $gb_label = utf8_encode(htmlspecialchars($parsed_definition_arr[0])); parent::AddRDF(parent::describeIndividual($gb_res, $gb_label, $this->getVoc() . "genbank-record") . parent::triplifyString($gb_res, $this->getVoc() . 'sequence-length', $parsed_locus_arr[0]['sequence_length']) . parent::triplifyString($gb_res, $this->getVoc() . 'strandedness', $parsed_locus_arr[0]['strandedness']) . parent::triplify($gb_res, "rdf:type", $this->getRes() . $parsed_locus_arr[0]['mol_type']) . parent::triplifyString($gb_res, $this->getVoc() . 'chromosome-shape', $parsed_locus_arr[0]['chromosome_shape']) . parent::triplifyString($gb_res, $this->getVoc() . 'division-name', $parsed_locus_arr[0]['division_name']) . parent::triplifyString($gb_res, $this->getVoc() . 'date-of-entry', $parsed_locus_arr[0]['date']) . parent::triplifyString($gb_res, $this->getVoc() . 'source', utf8_encode($parsed_source_arr[0])) . parent::QQuadO_URL($gb_res, $this->getVoc() . 'fasta-seq', 'https://www.ncbi.nlm.nih.gov/sviewer/viewer.cgi?sendto=on&db=nucest&dopt=fasta&val=' . $parsed_version_arr['gi'])); foreach ($parsed_features_arr as $aFeature) { //getFeatures $type = $aFeature['type']; $feat_desc = $this->getFeatures($type); $label = preg_replace('/\\s\\s*/', ' ', $feat_desc['definition']); $comment = null; $value = $aFeature['value']; $value_arr = explode("/", $value); $location = preg_replace('/\\n/', '', $value_arr[0]); $class_id = parent::getVoc() . md5($type); $feat_res = parent::getRes() . md5($type . $location . $gb_res); $feat_label = utf8_encode($type . " " . $location . " for " . $gb_res); if (isset($feat_desc['comment'])) { $comment = $feat_desc['comment']; $comment = preg_replace('/\\s\\s*/', ' ', $comment); $label .= " " . $comment; } parent::AddRDF(parent::describeClass($class_id, $label, parent::getVoc() . "Feature") . parent::describeIndividual($feat_res, $feat_label, $class_id) . parent::triplify($gb_res, $this->getVoc() . "has-feature", $feat_res)); foreach ($value_arr as $aL) { //check if aL has an equals in it $p = "/(\\S+)\\=(.*)/"; preg_match($p, $aL, $m); if (count($m)) { if ($m[1] == "db_xref") { parent::AddRDF(parent::triplify($feat_res, "rdfs:seeAlso", str_replace("\"", "", $m[2]))); } else { parent::AddRDF(parent::triplifyString($feat_res, $this->getVoc() . $m[1], utf8_encode(str_replace("\"", "", $m[2])))); } } } } foreach ($parsed_accession_arr[0] as $acc) { parent::AddRDF(parent::triplifyString($gb_res, $this->getVoc() . "accession", $acc)); } if (isset($parsed_version_arr['versioned_accession'])) { parent::AddRDF(parent::triplifyString($gb_res, $this->getVoc() . "versioned-accession", $parsed_version_arr['versioned_accession'])); } if (isset($parsed_contig_arr)) { foreach ($parsed_contig_arr as $aContig) { parent::AddRDF(parent::triplifyString($gb_res, $this->getVoc() . "contig", parent::safeLiteral($aContig))); } } foreach ($parsed_keyword_arr as $akw) { parent::AddRDF(parent::triplifyString($gb_res, $this->getVoc() . "keyword", $akw)); } if (isset($parsed_segments_arr)) { foreach ($parsed_segments_arr as $aSeg) { parent::AddRDF(parent::triplifyString($gb_res, $this->getVoc() . "segment-number", $aSeg['segment_number']) . parent::triplifyString($gb_res, $this->getVoc() . "total-segments", $aSeg['total_segments'])); } } foreach ($parsed_refs_arr as $aRef) { $r = rand(); $ref_res = $this->getRes() . md5($r); $ref_label = "reference for " . $gb_res; if (isset($aRef['TITLE'])) { parent::AddRDF(parent::describeIndividual($ref_res, $ref_label, $this->getVoc() . "reference") . parent::triplifyString($ref_res, $this->getVoc() . "title", $aRef['TITLE'])); } if (isset($aRef['PUBMED'])) { parent::AddRDF(parent::triplify($ref_res, $this->getVoc() . "x-pubmed", 'pubmed:' . $aRef['PUBMED'])); } if (isset($aRef['AUTHORS'])) { parent::AddRDF(parent::triplifyString($ref_res, $this->getVoc() . "authors", $aRef['AUTHORS'])); } parent::AddRDF(parent::triplify($gb_res, $this->getVoc() . "reference", $ref_res) . parent::triplifyString($ref_res, $this->getVoc() . "coordinates", $aRef['COORDINATES']) . parent::triplifyString($ref_res, $this->getVoc() . "citation", $aRef['JOURNAL'])); } $gb_record_str = ""; $this->WriteRDFBufferToWriteFile(); continue; } preg_match("/^\n\$/", $aLine, $matches); if (count($matches) == 0) { $gb_record_str .= $aLine; } } //while }
private function gene2accession() { $this->getReadFile()->read(200000); $header = array(0 => array('rel' => "x-taxonomy", 'ns' => "taxonomy"), 1 => array('rel' => "ncbigene", 'ns' => "ncbigene"), 2 => array('rel' => "status"), 3 => array('rel' => "rna-nucleotide-accession.version", 'ns' => "genbank"), 4 => array('rel' => "rna-nucleotide-gi", 'ns' => "gi"), 5 => array('rel' => "protein-accession.version", 'ns' => "genbank"), 6 => array('rel' => "protein-gi", 'ns' => "gi"), 7 => array('rel' => "genomic-nucleotide-accession.version", 'ns' => "genbank"), 8 => array('rel' => "genomic-nucleotide-gi", 'ns' => "gi"), 9 => array('rel' => "genomic-start-position"), 10 => array('rel' => "genomic-end-position"), 11 => array('rel' => "orientation"), 12 => array('rel' => "assembly"), 13 => array('rel' => "mature-peptide-accession.version", 'ns' => "genbank"), 14 => array('rel' => "mature-peptide-gi", 'ns' => "gi"), 15 => array('rel' => "symbol")); //(tab is used as a separator, pound sign - start of a comment) */ $z = 1; while ($l = $this->getReadFile()->read(200000)) { if ($l[0] == "#") { continue; } if ($z++ % 10000 == 0) { echo $z . PHP_EOL; parent::clear(); } $a = explode("\t", rtrim($l)); if (count($a) != 16) { trigger_error("gene2accession: expecting 16 columns, found " . count($a) . " instead", E_USER_ERROR); } $taxid = $a[0]; if (isset($this->taxids) and !isset($this->taxids[$taxid])) { continue; } $id = parent::getNamespace() . $a[1]; $refseq = false; if ($a[2] != '-') { $refseq = true; } if ($a[9] != '-' and $a[10] != '-') { $region = parent::getRes() . $a[7] . "/" . $a[9] . "-" . $a[10]; $start_pos = parent::getRes() . $a[7] . "/" . $a[9]; $stop_pos = parent::getRes() . $a[7] . "/" . $a[10]; if ($a[11] == "+") { $orientation = "faldo:ForwardStrandPosition"; } else { if ($a[11] == "-") { $orientation = "faldo:ReverseStrandPosition"; } else { $orientation = "faldo:StrandedPosition"; } } parent::addRDF(parent::describeIndividual($region, "location of ncbigene:" . $a[1] . " on " . $a[7], "faldo:Region") . parent::describeIndividual($start_pos, "start of ncbigene:" . $a[1] . " on " . $a[7], "faldo:ExactPosition") . parent::describeIndividual($stop_pos, "stop position of ncbigene:" . $a[1] . " on " . $a[7], "faldo:ExactPosition") . parent::triplify($id, "faldo:location", $region) . parent::triplify($region, "faldo:begin", $start_pos) . parent::triplify($start_pos, "rdf:type", $orientation) . parent::triplifyString($start_pos, "faldo:position", $a[9], "xsd:integer") . parent::triplify($start_pos, "faldo:reference", "refseq:" . $a[7]) . parent::triplify($region, "faldo:end", $stop_pos) . parent::triplify($stop_pos, "rdf:type", $orientation) . parent::triplifyString($stop_pos, "faldo:position", $a[10], "xsd:integer") . parent::triplify($stop_pos, "faldo:reference", "refseq:" . $a[7])); } foreach ($header as $i => $v) { if ($a[$i] == "-") { continue; } if ($i == 1 or $i == 9 or $i == 10 or $i == 11) { continue; } /// ncbigene if (isset($v['ns'])) { $ns = $v['ns']; if ($ns == 'genbank' and $refseq == true) { $ns = 'refseq'; } parent::addRDF(parent::triplify($id, parent::getVoc() . $v['rel'], "{$ns}:" . $a[$i])); } else { parent::addRDF(parent::triplifyString($id, parent::getVoc() . $v['rel'], $a[$i])); } } parent::writeRDFBufferToWriteFile(); } //while }
function gene_expression() { $h = explode(",", parent::getReadFile()->read()); $expected_columns = 8; if (($n = count($h)) != $expected_columns) { trigger_error("Found {$n} columns in gene file - expecting {$expected_columns}!", E_USER_WARNING); return false; } while ($l = parent::getReadFile()->read(200000)) { $data = str_getcsv($l); $mgi_symbol = $data[0]; $mgi_description = $data[1]; $geneid = $data[2]; $total_datasets = $data[3]; $total_ovexp = $data[4]; $total_underexp = $data[5]; $p_value = $data[6]; $expression = $data[7]; $id = parent::getRes() . md5($geneid . $total_datasets . $total_ovexp . $total_underexp . $p_value . $expression); $evidence_id = parent::getRes() . md5($geneid . $total_datasets . $total_ovexp . $total_underexp . $p_value . $expression . "_evidence"); $label = "Dietary restriction induced " . $expression . "-expression of " . $mgi_symbol . " based on microarray results from " . $total_datasets . " datasets, with p-value " . $p_value; $type_label = "Gene " . ucfirst($expression) . " Expression"; $type = parent::getVoc() . str_replace(" ", "-", $type_label); parent::addRDF(parent::describeIndividual($id, $label, $type) . parent::describeClass($type, $type_label) . parent::triplify($id, parent::getVoc() . "gene", "ncbigene:" . $geneid) . parent::triplifyString("ncbigene:" . $geneid, parent::getVoc() . "mgi-gene-symbol", $mgi_symbol) . parent::triplifyString("ncbigene:" . $geneid, parent::getVoc() . "mgi-gene-description", $mgi_description) . parent::triplify($id, parent::getVoc() . "evidence", $evidence_id) . parent::triplifyString($id, parent::getVoc() . "perturbation-context", "dietary restriction") . parent::triplifyString($evidence_id, parent::getVoc() . "total-number-datasets", $total_datasets) . parent::triplifyString($evidence_id, parent::getVoc() . "total-number-datasets-overexpressed", $total_ovexp) . parent::triplifyString($evidence_id, parent::getVoc() . "total-number-datasets-underexpressed", $total_underexp) . parent::triplifyString($evidence_id, parent::getVoc() . "p-value", $p_value)); parent::writeRDFBufferToWriteFile(); } //while }
function gene_interactions() { while ($l = parent::getReadFile()->Read()) { if ($l[0] == '#') { continue; } $data = explode("\t", $l); if (count($data) != 11) { trigger_error("Found " . count($data) . " columns, expecting 11"); continue; } $interaction = $data[0]; $interaction_type = str_replace("_", "-", $data[1]); $interaction_type_label = str_replace("_", " ", $data[1]); $int_additional_info = $data[2]; $gene1 = $data[5]; $gene2 = $data[8]; $interaction_id = parent::getNamespace() . $interaction; if ($interaction_type == "Genetic") { $int_pred = parent::getVoc() . "genetically-interacts-with"; } elseif ($interaction_type == "Physical") { $int_pred = parent::getVoc() . "physically-interacts-with"; } elseif ($interaction_type == "Predicted") { $int_pred = parent::getVoc() . "predicted-to-interact-with"; } elseif ($interaction_type == "Regulatory") { $int_pred = parent::getVoc() . "regulates"; } //elseif if ($int_additional_info == "No_interaction") { $interaction_label = "No " . strtolower($interaction_type) . " interaction between " . $gene1 . " and " . $gene2; parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, parent::getVoc() . $interaction_type . "-Non-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Non-Interaction", $interaction_type_label . " non-interaction") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2)); $npa_id = parent::getRes() . md5($interaction_id . "negative property assertion"); $npa_label = "Negative property assertion stating that " . $gene1 . " and " . $gene2 . " do not have a " . $interaction_type_label . " interaction"; parent::addRDF(parent::describeIndividual($npa_id, $npa_label, "owl:NegativeObjectPropertyAssertion") . parent::triplify($npa_id, "owl:sourceIndividual", parent::getNamespace() . $gene1) . parent::triplify($npa_id, "owl:targetIndividual", parent::getNamespace() . $gene2) . parent::triplify($npa_id, "owl:assertionProperty", $int_pred)); } elseif ($int_additional_info == "N/A" || $int_additional_info == "Genetic_interaction") { $interaction_label = $interaction_type . " interaction between " . $gene1 . " and " . $gene2; parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, parent::getVoc() . $interaction_type . "-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Interaction", $interaction_type_label . " Interaction") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2) . parent::triplify(parent::getNamespace() . $gene1, $int_pred, parent::getNamespace() . $gene2)); } else { $interaction_label = ($int_additional_info != "" ? $int_additional_info . " " : "") . strtolower($interaction_type) . " interaction between " . $gene1 . " and " . $gene2; $type = parent::getVoc() . ($int_additional_info != "" ? $int_additional_info . "-" : "") . $interaction_type . "-Interaction"; $type_label = ($int_additional_info != "" ? $int_additional_info . " " : "") . $interaction_type_label . " Interaction"; parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, $type) . parent::describeClass($type, $type_label, parent::getVoc() . $interaction_type . "-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Interaction", $interaction_type . " Interation") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2) . parent::triplify(parent::getNamespace() . $gene1, $int_pred, parent::getNamespace() . $gene2)); } //else parent::WriteRDFBufferToWriteFile(); } //while }
function process($file) { $z = 1; while ($l = parent::getReadFile()->read(100000)) { if ($z % 100000 == 0) { parent::clear(); } if ($l[0] == "!") { continue; } $fields = explode("\t", $l); if (count($fields) != 17) { trigger_error("Expected 17 columns, but found " . count($fields), E_USER_ERROR); return false; } //get the Go id $db = $fields[0]; $id = $fields[1]; $symbol = $fields[2]; $qualifier = $fields[3]; $goid = substr($fields[4], 3); $refs = $this->getDbReferences($fields[5]); $eco = $this->getEvidenceCodeLabelArr($fields[6]); $aspect = $this->getAspect($fields[8]); $label = $fields[9]; $synonyms = explode("|", $fields[10]); $taxid = $fields[12]; $date = $this->parseDate($fields[13]); $assignedBy = $fields[14]; //entity id $eid = $this->getdbURI($db, $id); if (!$eid) { print_r($fields); continue; } parent::addRDF(parent::describeIndividual($eid, $label, parent::getVoc() . "GO-Annotation") . parent::describeClass(parent::getVoc() . "GO-Annotation", "GO Annotation") . parent::triplifyString($eid, parent::getVoc() . "symbol", $symbol)); parent::addRDF(parent::triplify($eid, parent::getVoc() . "x-taxonomy", $taxid)); foreach ($synonyms as $s) { if (!empty($s)) { parent::addRDF(parent::triplifyString($eid, parent::getVoc() . "synonym", $s)); } } $rel = $aspect; if ($qualifier == 'NOT') { if ($aspect == 'process') { $rel = 'not-in-process'; } if ($aspect == 'function') { $rel = 'not-has-function'; } if ($aspect == 'component') { $rel = 'not-in-component'; } } parent::addRDF(parent::describeObjectProperty(parent::getVoc() . $rel, str_replace("-", " ", $rel)) . parent::triplify($eid, parent::getVoc() . $rel, "go:" . $goid)); $type = key($eco); $aid = parent::getRes() . $file . "_" . $z++; parent::addRDF(parent::describeObjectProperty(parent::getVoc() . "go-annotation", "GO annotation") . parent::triplify($eid, parent::getVoc() . "go-annotation", $aid)); $cat = parent::getRes() . md5($aspect); parent::addRDF(parent::describeIndividual($aid, "{$id}-go:{$goid} association", parent::getVoc() . "GO-Annotation") . parent::triplify($aid, parent::getVoc() . "target", $eid) . parent::triplify($aid, parent::getVoc() . "go-term", "go:" . $goid) . parent::triplify($aid, parent::getVoc() . "evidence", "eco:" . $eco[$type][1]) . parent::triplify($aid, parent::getVoc() . "go-category", $cat) . parent::describeClass($cat, $aspect) . parent::triplifyString($aid, parent::getVoc() . "assigned-by", $assignedBy)); if ($date != '') { parent::addRDF(parent::triplifyString($aid, parent::getVoc() . "entry-date", $date . "T00:00:00Z", "xsd:dateTime")); } foreach ($refs as $ref) { $b = explode(":", $ref); if ($b[0] == 'PMID') { parent::addRDF(parent::triplify($aid, parent::getVoc() . "article", "pubmed:" . $b[1])); } } //write RDF to file parent::writeRDFBufferToWriteFile(); } }
public function makeDescription($title, $type) { if (!$title) { return null; } $uri = parent::getRes() . md5($title); $type_uri = parent::getVoc() . str_replace(" ", "-", $type); parent::addRDF(parent::describeIndividual($uri, $title, $type_uri) . parent::describeClass($type_uri, $type)); return $uri; }
function parseItem($item) { $id = $item['@attributes']['id']; $label = $item['name']; parent::addRDF(parent::describeIndividual($id, $item['name'], parent::getVoc() . "Entry") . parent::describeClass(parent::getVoc() . "Entry", "MIRIAM database entry") . parent::triplifyString($id, parent::getVoc() . "namespace", $item['namespace'])); if (isset($item['@attributes'])) { foreach ($item['@attributes'] as $k => $v) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . $k, $v)); } } if (isset($item['comment'])) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "comment", $item['comment'])); } if (isset($item['definition'])) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "definition", $item['definition'])); } if (isset($item['synonyms'])) { $mylist = null; if (is_array($item['synonyms']['synonym'])) { $mylist = $item['synonyms']['synonym']; } else { $mylist[] = $item['synonyms']['synonym']; } foreach ($mylist as $myitem) { parent::addRDF(parent::triplifyString($id, "skos:altLabel", $myitem)); } } if (isset($item['uris'])) { foreach ($item['uris']['uri'] as $uri) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "uri", $uri)); } } if (isset($item['resources'])) { $mylist = null; if (!isset($item['resources']['resource']['dataEntry'])) { $mylist = $item['resources']['resource']; } else { $mylist[] = $item['resources']['resource']; } foreach ($mylist as $myitem) { $rid = $myitem['@attributes']['id']; parent::addRDF(parent::describeIndividual($rid, $myitem['dataInfo'], parent::getVoc() . "Resource") . parent::describeClass(parent::getVoc() . "Resource", "MIRIAM Resource") . parent::triplify($rid, parent::getVoc() . "url", $myitem['dataResource']) . parent::triplifyString($rid, parent::getVoc() . "urlTemplate", $myitem['dataEntry']) . parent::triplifyString($rid, parent::getVoc() . "organization", is_array($myitem['dataInstitution']) ? "" : $myitem['dataInstitution']) . parent::triplifyString($rid, parent::getVoc() . "location", is_array($myitem['dataLocation']) ? "" : $myitem['dataLocation']) . parent::triplify($id, parent::getVoc() . "resource", $rid)); } } if (isset($item['tags'])) { $i = $item['tags']['tag']; $mylist = null; if (!is_array($i)) { $mylist[] = $i; } else { $mylist = $i; } foreach ($mylist as $myitem) { parent::addRDF(parent::triplifyString($id, parent::getvoc() . "tag", $myitem)); } } if (isset($item['documentations'])) { $i = $item['documentations']['documentation']; $mylist = null; if (!is_array($i)) { $mylist[] = $i; } else { $mylist = $i; } foreach ($mylist as $myitem) { if (strstr($myitem, "pubmed")) { $uri = "pubmed:" . substr($myitem, strrpos($myitem, ":") + 1); } else { if (strstr($myitem, "doi")) { $uri = "http://dx.doi.org/" . substr($myitem, strpos($myitem, "doi:")); } else { $uri = $myitem; } } parent::addRDF(parent::triplify($id, parent::getvoc() . "documentation", $uri)); } } if (isset($item['restrictions'])) { $mylist = null; if (!isset($item['restrictions']['restriction']['statement'])) { $mylist = $item['restrictions']['restriction']; } else { $mylist[] = $item['restrictions']['restriction']; } foreach ($mylist as $i => $myitem) { $rid = parent::getRes() . str_replace(":", "", $id) . "_" . ($i + 1); $a = $myitem['@attributes']; $rid_type = parent::getVoc() . 'restriction_type_' . $a['type']; parent::addRDF(parent::describeIndividual($rid, $a['desc'], parent::getVoc() . "Restriction") . parent::describeClass(parent::getVoc() . "Restriction", "Resource Restriction") . parent::triplify($rid, "rdf:type", $rid_type) . parent::describeClass($rid_type, $a['desc'], parent::getVoc() . "Restriction") . parent::triplifyString($rid, "dct:description", $myitem['statement']) . parent::triplify($rid, "foaf:page", isset($myitem['link']) ? $myitem['link'] : "") . parent::triplify($id, parent::getVoc() . "restriction", $rid)); } } /* <annotation> <format name="SBML"> <elements> <element>reaction</element> <element>event</element> <element>rule</element> <element>species</element> </elements> </format> */ if (isset($item['annotation'])) { $mylist = null; if (!isset($item['annotation']['format']['elements'])) { $mylist = $item['annotation']['format']; } else { $mylist[] = $item['annotation']['format']; } foreach ($mylist as $i => $myitem) { $name = $myitem['@attributes']['name']; $myid = str_replace("MIR:", parent::getRes(), $id) . "_annotation_" . ($i + 1) . "_" . urlencode($name); parent::addRDF(parent::describeIndividual($myid, "{$label} used by {$name}", parent::getVoc() . "ValueSet") . parent::describeClass(parent::getVoc() . "ValueSet", "MIRIAM Value Set") . parent::triplifyString($myid, parent::getVoc() . "used-in", $name) . parent::triplify($myid, parent::getVoc() . "uses", $id)); $b = $myitem['elements']['element']; $mylist2 = null; if (!is_array($b)) { $mylist2[] = $b; } else { $mylist2 = $b; } foreach ($mylist2 as $i => $e) { parent::addRDF(parent::triplifyString($myid, parent::getVoc() . "used-for", $e)); } } } }