Beispiel #1
0
 function pathways()
 {
     // needs to be finished
     return;
     while ($l = $this->getReadFile()->read(50000)) {
         $a = explode("\t", trim($l));
         // From	To	Reaction Type	Controller	Control Type	Cell Type	PubMed Id	Genes	Drugs	Diseases
         // hmg coa reductase inhibitors	Active & Inactive metabolites	Biochemical Reaction	CYP2C19,CYP2C8,CYP2C9,CYP2D6,CYP3A4,CYP3A5,UGT1A1,UGT1A3,UGT2B7	Catalysis	hepatocyte		CYP3A4,CYP3A5,UGT1A3,CYP2C19,CYP2C9,CYP2C8,CYP2D6,UGT1A1,UGT2B7	hmg coa reductase inhibitors
         $c1 = array_search($a[0], $this->drugs);
         if ($c1 === FALSE) {
             $c1 = array_search($a[0], $this->genes);
             if ($c1 === FALSE) {
                 $c1 = parent::getRes() . url_encode($c1);
             } else {
                 $c1 = parent::getNamespace() . $c1;
             }
         }
         $c2 = array_search($a[1], $this->drugs);
         if ($c2 === FALSE) {
             $c2 = array_search($a[1], $this->genes);
             if ($c2 === FALSE) {
                 // not found
                 $c2 = parent::getRes() . url_encode($c2);
             } else {
                 // actual id
                 $c2 = parent::getNamespace() . $c2;
             }
         }
         $id = md5($l);
         $uri = parent::getRes() . $id;
         parent::writeRDFBufferToWriteFile();
     }
 }
Beispiel #2
0
 function pubmed()
 {
     $citations = null;
     $ext = substr(strrchr($this->getReadFile()->getFileName(), '.'), 1);
     if ($ext = "gz") {
         $citations = new SimpleXMLElement("compress.zlib://" . $this->getReadFile()->getFileName(), NULL, TRUE);
     } elseif ($ext = "xml") {
         $citations = new SimpleXMLElement($this->getReadFile()->getFileName(), NULL, TRUE);
     }
     foreach ($citations->MedlineCitation as $citation) {
         $this->setCheckPoint('record');
         $pmid = "" . $citation->PMID;
         if (isset($this->id_list)) {
             if (!isset($this->id_list[$pmid])) {
                 continue;
             } else {
                 echo "processing {$pmid}" . PHP_EOL;
             }
         }
         $pmid_uri = parent::getNamespace() . $citation->PMID;
         $article = $citation->Article;
         parent::addRDF(parent::describeIndividual($pmid_uri, $this->getString($article->ArticleTitle), parent::getVoc() . "PubMedRecord") . parent::describeClass(parent::getVoc() . "PubMedRecord", "PubMedRecord") . parent::triplify($pmid_uri, "rdfs:seeAlso", "http://www.ncbi.nlm.nih.gov/pubmed/{$pmid}"));
         // metadata about the record
         $owner = parent::getRes() . md5($citation['Owner']);
         parent::addRDF(parent::describeIndividual($owner, $citation['Owner'], "foaf:Agent") . parent::triplify($pmid_uri, parent::getVoc() . "owner", $owner));
         $status = parent::getRes() . md5($citation['Status']);
         parent::addRDF(parent::describeIndividual($status, $citation['Status'], parent::getVoc() . "Status") . parent::describeClass(parent::getVoc() . "Status", "Status") . parent::triplify($pmid_uri, parent::getVoc() . "status", $status) . parent::triplifyString($pmid_uri, parent::getVoc() . "version", $citation['VersionID']));
         $this->addDate($pmid_uri, "version-date", $citation['VersionDate']);
         $this->addDate($pmid_uri, "date-created", $citation->DateCreated);
         $this->addDate($pmid_uri, "date-revised", $citation->DateRevised);
         $this->addDate($pmid_uri, "date-completed", $citation->DateCompleted);
         if (!empty($citation->MeshHeadingList)) {
             $i = 0;
             foreach ($citation->MeshHeadingList->MeshHeading as $mh) {
                 $id = parent::getRes() . $pmid . "_mh_" . ++$i;
                 $did = parent::getRes() . md5($mh->DescriptorName);
                 parent::addRDF(parent::describeIndividual($id, $mh->DescriptorName, parent::getVoc() . "MeshHeading") . parent::describeClass(parent::getVoc() . "MeshHeading", "MeSH Heading") . parent::triplify($pmid_uri, parent::getVoc() . "mesh-heading", $id) . parent::triplifyString($id, parent::getVoc() . "descriptor-major-topic", "" . $mh->DescriptorName['MajorTopicYN']) . parent::describeIndividual($did, "" . $mh->DescriptorName, parent::getVoc() . "Mesh-Descriptor") . parent::triplify($id, parent::getVoc() . "mesh-descriptor", $did));
                 if (!empty($mh->QualifierName)) {
                     foreach ($mh->QualifierName as $qualifier_name) {
                         $qid = parent::getRes() . md5($qualifier_name);
                         parent::addRDF(parent::describeIndividual($qid, $qualifier_name, parent::getVoc() . "Mesh-Qualifier") . parent::triplify($id, parent::getVoc() . "mesh-qualifier", $qid));
                     }
                 }
             }
         }
         if (!empty($citation->ChemicalList)) {
             $i = 0;
             foreach ($citation->ChemicalList->Chemical as $chemical) {
                 $id = parent::getRes() . $pmid . "_ch_" . ++$i;
                 parent::addRDF(parent::describeIndividual($id, $chemical->NameOfSubstance, parent::getVoc() . "Chemical") . parent::describeClass(parent::getVoc() . "Chemical", "Chemical") . parent::triplify($pmid_uri, parent::getVoc() . "chemical", $id));
                 if ($chemical->RegistryNumber != "0") {
                     // check if "EC"
                     if (substr($chemical->RegistryNumber, 0, 2) == "EC") {
                         $ec = substr($chemical->RegistryNumber, 3);
                         parent::addRDF(parent::triplify($id, parent::getVoc() . "x-ec", "ec:" . $ec));
                     } else {
                         parent::addRDF(parent::triplify($id, parent::getVoc() . "x-cas", "cas:" . $chemical->RegistryNumber));
                     }
                 }
             }
         }
         if (!empty($citation->GeneSymbolList)) {
             foreach ($citation->GeneSymbolList->GeneSymbol as $geneSymbol) {
                 parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "gene-symbol", $geneSymbol));
             }
         }
         if (!empty($citation->SupplMeshList)) {
             foreach ($citation->SupplMeshList->SupplMeshName as $supplMeshName) {
                 $id = parent::getRes() . md5($supplMeshName);
                 parent::addRDF(parent::describeIndividual($id, $supplMeshName, parent::getVoc() . "MeshHeading") . parent::triplify($pmid_uri, parent::getVoc() . "supplemental-mesh-heading", $id));
             }
         }
         foreach ($article->PublicationTypeList->PublicationType as $publicationType) {
             $id = parent::getRes() . md5($publicationType);
             $label = str_replace(" ", "-", $publicationType);
             parent::addRDF(parent::triplify($pmid_uri, parent::getVoc() . "publication-type", $id) . parent::describeClass($id, $publicationType));
         }
         if (!empty($article->Abstract)) {
             $id = parent::getRes() . $pmid . "_ABSTRACT";
             $label = "Abstract for PMID:{$pmid}";
             $abstract = $article->Abstract;
             parent::addRDF(parent::describeIndividual($id, $label, parent::getVoc() . "Article-Abstract") . parent::describeClass(parent::getVoc() . "Article-Abstract", "Article Abstract") . parent::triplify($pmid_uri, "dc:abstract", $id) . parent::triplifyString($id, parent::getVoc() . "copyright", $abstract->CopyrightInformation));
             $section = 0;
             $abstractText = "";
             foreach ($abstract->AbstractText as $text) {
                 $abstractText .= " " . $text;
                 if (!empty($text['Label']) && $text['Label'] !== "UNLABELLED") {
                     $section_id = parent::getRes() . $pmid . "_ABSTRACT_SECTION_" . ++$section;
                     parent::addRDF(parent::triplify($id, parent::getVoc() . "section", $section_id) . parent::triplifyString($section_id, parent::getVoc() . "order", $section) . parent::triplifyString($section_id, parent::getVoc() . "nlm-section-type", $text['NlmCategory']) . parent::triplifyString($section_id, parent::getVoc() . "label", $text['Label']) . parent::triplifyString($section_id, parent::getVoc() . "text", $text));
                 }
             }
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "abstract-text", $abstractText));
         }
         if (!empty($citation->OtherAbstract)) {
             $i = 0;
             foreach ($citation->OtherAbstract as $ab) {
                 $id = parent::getRes() . $pmid . "_oa_" . ++$i;
                 parent::addRDF(parent::describeIndividual($id, "", parent::getVoc() . "Article-Abstract") . parent::describeClass(parent::getVoc() . "Article-Abstract", "Article Abstract") . parent::triplify($pmid_uri, "dc:abstract", $id));
                 $abstractText = "";
                 foreach ($ab->AbstractText as $text) {
                     $abstractText .= " " . $text;
                     if (!empty($text['Label']) && $text['Label'] !== "UNLABELLED") {
                         parent::addRDF(parent::triplifyString($id, parent::getVoc() . "abstract_" . strtolower($text['Category']), $text));
                     }
                 }
                 parent::addRDF(parent::triplifyString($id, parent::getVoc() . "abstract-text", $abstractText));
             }
         }
         $author_types = array("Investigator", "Author", "PersonalNameSubject");
         foreach ($author_types as $author_type) {
             $listname = $author_type . "List";
             if (!empty($article->{$listname}->{$author_type})) {
                 $i = 0;
                 foreach ($article->{$listname}->{$author_type} as $author) {
                     $id = parent::getRes() . $pmid . "_AUTHOR_" . ++$i;
                     $author_label = $author->LastName . ($author->Initials ? ", " . $author->Initials : "");
                     parent::addRDF(parent::describeIndividual($id, $author_label, parent::getVoc() . $author_type) . parent::describeClass(parent::getVoc() . $author_type, $author_type) . parent::triplifyString($id, parent::getVoc() . "list-position", $i) . parent::triplify($pmid_uri, parent::getVoc() . strtolower($author_type), $id) . parent::triplifyString($id, parent::getVoc() . "last-name", $author->LastName) . parent::triplifyString($id, parent::getVoc() . "fore-name", $author->ForeName) . parent::triplifyString($id, parent::getVoc() . "initials", $author->Initials) . parent::triplifyString($id, parent::getVoc() . "collective-name", $author->CollectiveName) . parent::triplifyString($id, parent::getVoc() . "suffix", $author->Suffix));
                     if ($author->Affiliation) {
                         $affilitation = parent::getRes() . md5($author->Affilitation);
                         parent::addRDF(parent::describeIndividual($affilitation, $author->Affilitation, parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplifyString($id, parent::getVoc() . "affiliation", $affilitation));
                     }
                     foreach ($author->NameID as $authorNameId) {
                         if (!empty($authorNameId)) {
                             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "name-id", $author_name_id));
                         }
                     }
                 }
             }
         }
         if (!empty($article->ArticleDate)) {
             $this->addDate($pmid_uri, "article-date", $article->ArticleDate);
         }
         foreach ($article->Language as $language) {
             parent::addRDF(parent::triplifyString($pmid_uri, "dc:language", $language));
         }
         if (!empty($citation->KeywordList)) {
             foreach ($citation->KeywordList->Keyword as $keyword) {
                 parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "keyword", $keyword));
             }
         }
         if (!empty($citation->otherID)) {
             // untested
             foreach ($citation->OtherID as $otherID) {
                 if (!empty($otherID)) {
                     parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "other-id", $other_id) . parent::triplifyString($pmid_uri, parent::getVoc() . "other-id-source", $otherID['Source']));
                     if (strstr($other_id, "PMC")) {
                         parent::addRDF(parent::triplify($pmid_uri, parent::getVoc() . "x-pmc", "pmc:" . $other_id));
                     }
                 }
             }
         }
         if (!empty($article->DataBankList)) {
             foreach ($article->DataBankList->DataBank as $dataBank) {
                 parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "databank", $dataBank->DataBankName));
                 if ($dataBank->AccessionNumberList !== NULL) {
                     foreach ($dataBank->AccessionNumberList->AccessionNumber as $acc) {
                         parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "x-" . strtolower($dataBank->dataBankName), $acc));
                     }
                 }
             }
         }
         if (!empty($article->GrantList)) {
             $i = 0;
             foreach ($article->GrantList->Grant as $grant) {
                 $id = parent::getRes() . $pmid . "_GRANT_" . ++$i;
                 $grant_label = "Grant " . $grant->GrantID . " for " . parent::getNamespace() . $pmid;
                 parent::addRDF(parent::describeIndividual($id, $grant_label, parent::getVoc() . "Grant") . parent::describeClass(parent::getVoc() . "Grant", "Grant") . parent::triplify($pmid_uri, parent::getVoc() . "grant", $id) . parent::triplifyString($id, parent::getVoc() . "grant-identifier", $grant->GrantID) . parent::triplifyString($id, parent::getVoc() . "grant-acronym", $grant->Acronym) . parent::triplifyString($id, parent::getVoc() . "grant-agency", $grant->Agency) . parent::triplifyString($id, parent::getVoc() . "grant-country", $grant->Country));
             }
         }
         if (!empty($citation->NumberOfReferences)) {
             parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "number-of-references", $citation->NumberOfReferences));
         }
         if (!empty($article->VernacularTitle)) {
             parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "vernacular-title", $article->VernacularTitle));
         }
         foreach ($citation->CitationSubset as $citationSubset) {
             if (!empty($citationSubset)) {
                 parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "citation-subset", $citationSubset));
             }
         }
         if (!empty($citation->commentsCorrectionsList)) {
             $i = 0;
             foreach ($commentsCorrectionsList->CommentsCorrections as $commentCorrection) {
                 $id = parent::getRes() . $pmid . "_COMMENT_CORRECTION_" . ++$i;
                 $ccRefType = $commentCorrection['RefType'];
                 $ccPmid = $commentCorrection->PMID;
                 //optional
                 $ccNote = $commentCorrection->Note;
                 //optional
                 $cc_label = "Comment or correction ." . $ccNumber . " for " . parent::getNamespace() . $pmid;
                 parent::addRDF(parent::describeIndividual($id, $cc_label, parent::getVoc() . "CommentCorrection") . parent::describeClass(parent::getVoc() . "CommentCorrection", "CommentCorrection") . parent::triplify($pmid_uri, parent::getVoc() . "comment-correction", $id) . parent::triplify($id, "rdf:type", parent::getVoc() . $ccRefType) . parent::triplifyString($id, parent::getVoc() . "ref-source", $ref_source) . parent::triplifyString($id, parent::getVoc() . "note", $cc_note));
             }
         }
         if (!empty($citation->generalNote)) {
             parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "general-note", $general_note));
         }
         foreach ($citation->SpaceFlightMission as $spaceFlightMission) {
             if (!empty($spaceFlightMission)) {
                 parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "space-flight-mission" . $space_flight_mission));
             }
         }
         $journal = $article->Journal;
         $journalId = parent::getRes() . $pmid . "_JOURNAL";
         $journal_label = "Journal for " . parent::getNamespace() . $pmid;
         parent::addRDF(parent::describeIndividual($journalId, $journal_label, parent::getVoc() . "Journal") . parent::describeClass(parent::getVoc() . "Journal", "Journal") . parent::triplify($pmid_uri, parent::getVoc() . "journal", $journalId) . parent::triplify($journalId, parent::getVoc() . "x-issn", "issn:" . $journal->ISSN) . parent::triplifyString($journalId, parent::getVoc() . "journal-nlm-identifier", $citation->MedLineJournalInfo->NlmUniqueID) . parent::triplifyString($journalId, parent::getVoc() . "journal-title", $journal->Title) . parent::triplifyString($journalId, parent::getVoc() . "journal-abbreviation", $journal->ISOAbbreviation) . parent::triplifyString($journalId, parent::getVoc() . "volume", $journal->JournalIssue->Volume) . parent::triplifyString($journalId, parent::getVoc() . "issue", $journal->JournalIssue->Issue) . parent::triplifyString($journalId, parent::getVoc() . "pages", "" . $article->Pagination->MedlinePgn));
         $journalPubDate = $journal->JournalIssue->PubDate;
         if (!empty($journalPubDate)) {
             $journalYear = $journalPubDate->Year;
             $journalMonth = trim($journalPubDate->Month);
             //optional
             if ($journalMonth and !is_numeric($journalMonth[0])) {
                 $mo = array("jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec");
                 $journalMonth = str_pad(array_search(strtolower($journalMonth), $mo) + 1, 2, "0", STR_PAD_LEFT);
             }
             $journalDay = trim($journalPubDate->Day);
             //optional
             if ($journalDay) {
                 $journalDay = str_pad($journalDay, 2, "0", STR_PAD_LEFT);
             }
             parent::addRDF(parent::triplifyString($journalId, parent::getVoc() . "publication-year", $journalYear) . parent::triplifyString($journalId, parent::getVoc() . "publication-month", $journalMonth) . parent::triplifyString($journalId, parent::getVoc() . "publication-day", $journalDay) . parent::triplifyString($journalId, parent::getVoc() . "publication-season", $journalPubDate->Season) . parent::triplifyString($journalId, parent::getVoc() . "publication-date", $journalPubDate->MedlineDate));
             if (!empty($journalYear) and !empty($journalMonth) and !empty($journalDay)) {
                 parent::addRDF(parent::triplifyString($journalId, parent::getVoc() . "publication-date", "{$journalYear}-{$journalMonth}-{$journalDay}", "xsd:date"));
             }
         }
         foreach ($citation->Article->ELocation as $eLocation) {
             if (!empty($eLocation)) {
                 parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "elocation", $eLocation));
             }
         }
         $this->writeRDFBufferToWriteFile();
         //break;
     }
 }
Beispiel #3
0
 function Parse($file)
 {
     parent::getReadFile()->read();
     // skip the first comment line
     $line = 1;
     $first = true;
     while ($l = parent::getReadFile()->read(500000)) {
         if ($l[0] == "#") {
             // dataset attributes
             $a = explode('=', trim($l));
             $r = $this->getVoc() . substr($a[0], 2);
             if (isset($a[1])) {
                 $v = $a[1];
                 if ($r == "affymetrix_vocabulary:genome-version-create_date") {
                     $x = explode("-", $a[1]);
                     if ($x[2] == "00") {
                         $x[2] = "01";
                     }
                     $v = implode("-", $x);
                 }
                 parent::addRDF(parent::triplifyString(parent::getDatasetURI(), $r, $v) . parent::describe($r, "{$r}"));
             }
             continue;
         }
         if ($first == true) {
             $first = false;
             // header
             $header = explode(",", str_replace('"', '', trim($l)));
             //				print_r($header);exit;
             $n = count($header);
             if ($n != 41) {
                 trigger_error("Expecting 41 columns, found {$n} in header on line {$line}!", E_USER_ERROR);
                 exit;
             }
             continue;
         }
         $a = explode('","', substr($l, 1, -2));
         $n = count($a);
         if ($n != 41) {
             trigger_error("Expecting 41 columns, found {$n} on line {$line}!", E_USER_ERROR);
             exit;
         }
         parent::writeRDFBufferToWriteFile();
         $id = $a[0];
         $qname = "affymetrix:{$id}";
         $label = "probeset {$a['0']} on GeneChip {$a['1']} ({$a['2']})";
         parent::addRDF(parent::describeIndividual($qname, $label, $this->getVoc() . "Probeset") . parent::describeClass($this->getVoc() . "Probeset", "Affymetrix probeset"));
         trigger_error($id, E_USER_NOTICE);
         // now process the entries
         foreach ($a as $k => $v) {
             if (trim($v) == '---') {
                 continue;
             }
             // multi-valued entries are separated by ////
             $b = explode(" /// ", $v);
             $r = $this->Map($k);
             if (isset($r)) {
                 foreach ($b as $c) {
                     $d = explode(" // ", $c);
                     if ($r == 'symbol') {
                         $d[0] = str_replace(" ", "-", $d[0]);
                     }
                     $s = $this->getRegistry()->getPreferredPrefix($r);
                     if ($s == "ec") {
                         $e = explode(":", $d[0]);
                         $d[0] = $e[1];
                     }
                     $this->addRDF(parent::triplify($qname, $this->getVoc() . "x-{$s}", "{$s}:" . $d[0]) . parent::describeProperty($this->getVoc() . "x-{$s}", "a relation to {$s}"));
                 }
             } else {
                 // we handle manually
                 unset($rel);
                 $label = $header[$k];
                 switch ($label) {
                     case 'GeneChip Array':
                         $array_id = parent::getRes() . str_replace(" ", "-", $v);
                         parent::addRDF(parent::triplify($qname, $this->getVoc() . "genechip-array", $array_id) . parent::describeIndividual($array_id, "Affymetrix {$v} GeneChip array", $this->getVoc() . "Genechip-Array") . parent::describeClass($this->getVoc() . "Genechip-Array", "Affymetrix GeneChip array"));
                         break;
                     case 'Gene Ontology Biological Process':
                         if (!isset($rel)) {
                             $rel = 'go-process';
                             $prefix = "go";
                         }
                     case 'Gene Ontology Cellular Component':
                         if (!isset($rel)) {
                             $rel = 'go-location';
                             $prefix = "go";
                         }
                     case 'Gene Ontology Molecular Function':
                         if (!isset($rel)) {
                             $rel = 'go-function';
                             $prefix = "go";
                         }
                         $b = explode(" /// ", $v);
                         foreach ($b as $c) {
                             $d = explode(" // ", $c);
                             parent::addRDF($this->triplify($qname, $this->getVoc() . $rel, "{$prefix}:" . $d[0]) . $this->describeProperty($this->getVoc() . $rel, "{$rel}"));
                         }
                         break;
                     case 'Transcript Assignments':
                         $b = explode(" /// ", $v);
                         foreach ($b as $c) {
                             $d = explode(" // ", $c);
                             $id = $d[0];
                             $prefix = $d[2];
                             if ($prefix == '---' || $id == '---') {
                                 continue;
                             } else {
                                 if ($prefix == 'gb' || $prefix == 'gb_htc') {
                                     $prefix = 'genbank';
                                 } else {
                                     if ($prefix == 'ncbibacterial') {
                                         $prefix = 'gi';
                                     } else {
                                         if ($prefix == 'ncbi_bacterial') {
                                             $prefix = 'gi';
                                         } else {
                                             if ($prefix == 'ens') {
                                                 $prefix = 'ensembl';
                                             } else {
                                                 if ($prefix == 'ncbi_mito' || $prefix == 'ncbi_organelle' || $prefix == 'organelle') {
                                                     $prefix = 'refseq';
                                                 } else {
                                                     if ($prefix == 'affx' || $prefix == 'unknown' || $prefix == "prop") {
                                                         $prefix = 'affymetrix';
                                                     } else {
                                                         if ($prefix == 'tigr_2004_08') {
                                                             $prefix = 'tigr';
                                                         } else {
                                                             if ($prefix == 'tigr-plantta') {
                                                                 $prefix = 'genbank';
                                                             } else {
                                                                 if ($prefix == 'newrs.gi') {
                                                                     $prefix = 'gi';
                                                                 } else {
                                                                     if ($prefix == 'newRS.gi') {
                                                                         $prefix = 'gi';
                                                                     } else {
                                                                         if ($prefix == 'primate_viral') {
                                                                             $prefix = 'genbank';
                                                                         } else {
                                                                             if ($prefix == 'jgi-bacterial') {
                                                                                 $prefix = 'ncbigene';
                                                                             } else {
                                                                                 if ($prefix == 'tb') {
                                                                                     $prefix = 'tuberculist';
                                                                                 } else {
                                                                                     if ($prefix == 'pa') {
                                                                                         $prefix = 'pseudomonas';
                                                                                     } else {
                                                                                         if ($prefix == 'gi|53267') {
                                                                                             $prefix = 'gi';
                                                                                             $id = '53267';
                                                                                         } else {
                                                                                             if ($prefix == 'broad-tcup') {
                                                                                                 $e = explode("-", $id);
                                                                                                 $id = $e[0];
                                                                                             } else {
                                                                                                 if ($prefix == 'organelle') {
                                                                                                     $e = explode("-", $id);
                                                                                                     $prefix = 'genbank';
                                                                                                     $id = $e[0];
                                                                                                 }
                                                                                             }
                                                                                         }
                                                                                     }
                                                                                 }
                                                                             }
                                                                         }
                                                                     }
                                                                 }
                                                             }
                                                         }
                                                     }
                                                 }
                                             }
                                         }
                                     }
                                 }
                             }
                             parent::addRDF(parent::triplify($qname, $this->getVoc() . "transcript-assignment", "{$prefix}:{$id}") . parent::describeProperty($this->getVoc() . "transcript-assignment", "transcript assignment"));
                         }
                         break;
                     case 'Annotation Transcript Cluster':
                         /*
                         							$id = substr($v,0,strpos($v,"("));
                         								
                         
                         							$rel = str_replace(" ","-",strtolower($label));
                         							$this->AddRDF($this->triplify($qname,parent::getVoc()."$rel", "refseq:$id"));
                         */
                         break;
                     case 'Annotation Date':
                         // Jun 9, 2011
                         $rel = "annotation-date";
                         preg_match("/^([A-Za-z]+) ([0-9]+), ([0-9]{4})\$/", $v, $m);
                         if (count($m) == 4) {
                             array_shift($m);
                             list($m, $day, $year) = $m;
                             $month = $this->getMonth($m);
                             if (!$day || $day == "0") {
                                 $day = "01";
                             }
                             $date = $year . "-" . $month . "-" . str_pad($day, 2, "0", STR_PAD_LEFT) . "T00:00:00Z";
                             parent::addRDF(parent::triplifyString($qname, $this->getVoc() . $rel, $date, "xsd:dateTime") . parent::describeProperty($this->getVoc() . $rel, "{$rel}"));
                         } else {
                             trigger_error("could not match date from {$v}", E_USER_ERROR);
                         }
                         break;
                     case 'Species Scientific Name':
                         break;
                     case 'Transcript ID(Array Design)':
                         if (!isset($rel)) {
                             $rel = 'transcript';
                         }
                     case 'Sequence type':
                     default:
                         if (!isset($rel)) {
                             $rel = str_replace(" ", "-", strtolower($label));
                         }
                         $b = explode(" /// ", $v);
                         foreach ($b as $c) {
                             parent::addRDF(parent::triplifyString($qname, $this->getVoc() . $rel, stripslashes($c)) . parent::describeProperty($this->getVoc() . $rel, "{$rel}"));
                         }
                         break;
                 }
                 //  switch
             }
             // else
         }
         $this->WriteRDFBufferToWriteFile();
     }
 }
Beispiel #4
0
 function parse($file)
 {
     $xml = new CXML($file);
     $xml->parse();
     $entry = $xml->getXMLRoot();
     if (!isset($entry) or !$entry) {
         return false;
     }
     foreach ($entry->children() as $o) {
         $rsid = "rs" . $o->attributes()->rsId;
         $id = parent::getNamespace() . $rsid;
         $type = parent::getVoc() . ucfirst(str_replace(" ", "-", (string) $o->attributes()->snpClass));
         $snpclass = parent::getVoc() . (string) $o->attributes()->snpClass;
         $moltype = parent::getVoc() . (string) $o->attributes()->molType;
         // attributes
         parent::addRDF(parent::describeIndividual($id, $rsid, $type) . parent::describeClass($type, ucfirst("" . $o->attributes()->snpClass)) . parent::triplify($id, parent::getVoc() . "mol-type", $moltype) . parent::describeClass($moltype, (string) $o->attributes()->molType, parent::getVoc() . "Moltype") . parent::describeClass(parent::getVoc() . "Moltype", "Moltype") . parent::triplify($id, parent::getVoc() . "taxid", "taxonomy:" . (string) $o->attributes()->taxId));
         $genotype = (string) $o->attributes()->genoType;
         if ($genotype) {
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "genotype", parent::getVoc() . $genotype, "xsd:bool"));
         }
         // frequency
         // create/update
         /*			if(!isset($o->Update)) $a = $o->Create;
         			else $a = $o->Update;
         			parent::addRDF(parent::triplifyString($id,parent::getVoc()."build",(string) $a->attributes()->build));
         */
         //validation
         $a = $o->Validation;
         parent::addRDF(parent::triplifyString($id, parent::getVoc() . "validation-by-cluster", (string) $a->attributes()->byCluster) . parent::triplifyString($id, parent::getVoc() . "validation-by-frequency", (string) $a->attributes()->byFrequency) . parent::triplifyString($id, parent::getVoc() . "validation-by-2hit2allele", (string) $a->attributes()->by2Hit2Allele) . parent::triplifyString($id, parent::getVoc() . "validation-by-1000G", (string) $a->attributes()->by1000G));
         //hgvs names
         foreach ($o->hgvs as $name) {
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "hgvs-name", (string) $name));
         }
         // assembly
         $assembly = $o->Assembly;
         if ($assembly and $assembly->attributes()->reference == "true") {
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "dbsnp-build", (string) $assembly->attributes()->dbSnpBuild) . parent::triplifyString($id, parent::getVoc() . "genome-build", (string) $assembly->attributes()->genomeBuild));
             $component = $assembly->Component;
             if ($component) {
                 parent::addRDF(parent::triplify($id, parent::getVoc() . "contig-accession", "genbank:" . (string) $component->attributes()->accession) . parent::triplify($id, parent::getVoc() . "contig-gi", "gi:" . (string) $component->attributes()->gi) . parent::triplifyString($id, parent::getVoc() . "chromosome", (string) $component->attributes()->chromosome));
                 $maploc = $component->MapLoc;
                 if ($maploc) {
                     foreach ($maploc->children() as $fxnset) {
                         $fxnset_id = parent::getRes() . md5($fxnset->asXML());
                         parent::addRDF(parent::triplify($id, parent::getVoc() . "maps-to", $fxnset_id) . parent::triplify($fxnset_id, "rdf:type", parent::getVoc() . "Fxnset") . parent::describeClass(parent::getVoc() . "Fxnset", "Fxnset"));
                         if (isset($fxnset->attributes()->geneId)) {
                             parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "gene", "ncbigene:" . (string) $fxnset->attributes()->geneId));
                         }
                         if (isset($fxnset->attributes()->symbol)) {
                             parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "gene-symbol", (string) $fxnset->attributes()->symbol));
                         }
                         if (isset($fxnset->attributes()->mrnaAcc)) {
                             parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "mrna", "refseq:" . (string) $fxnset->attributes()->mrnaAcc));
                         }
                         if (isset($fxnset->attributes()->protAcc)) {
                             parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "protein", "refseq:" . (string) $fxnset->attributes()->protAcc));
                         }
                         if (isset($fxnset->attributes()->fxnClass)) {
                             parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "fxn-class", (string) $fxnset->attributes()->fxnClass));
                         }
                         if (isset($fxnset->attributes()->allele)) {
                             parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "allele", (string) $fxnset->attributes()->allele));
                         }
                         if (isset($fxnset->attributes()->residue)) {
                             parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "residue", (string) $fxnset->attributes()->residue));
                         }
                         if (isset($fxnset->attributes()->readingFrame)) {
                             parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "reading-frame", (string) $fxnset->attributes()->readingFrame));
                         }
                         if (isset($fxnset->attributes()->aaPosition)) {
                             parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "position", (string) $fxnset->attributes()->aaPosition));
                         }
                     }
                 }
             }
         }
     }
     unset($xml);
 }
Beispiel #5
0
 /**
  * add an RDF representation of the incoming param to the model.
  * @$desc_record_arr is an assoc array with the contents of one qualifier record
  */
 private function makeSupplementaryRecord($sup_record_arr)
 {
     //get the UI of the supplementary record
     if (!isset($sup_record_arr['UI'][0]) or !isset($sup_record_arr['NM'][0])) {
         return;
     }
     $sr_ui = $sup_record_arr["UI"][0];
     $sr_res = $this->getNamespace() . $sr_ui;
     $sr_label = $sup_record_arr['NM'][0];
     parent::addRDF(parent::describeIndividual($sr_res, $sr_label, $this->getVoc() . "Supplementary-Descriptor", $sr_label) . parent::describeClass($this->getVoc() . "Supplementary-Descriptor", "MeSH Supplementary Descriptor"));
     //now get the descriptor_data_elements
     $sde = $this->getSupplementaryConceptRecords();
     //iterate over the properties
     foreach ($sup_record_arr as $k => $v) {
         if (array_key_exists($k, $sde)) {
             //add date of entry
             if ($k == "DA") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['DA'], $this->formatDate($vv), "xsd:date") . parent::describeProperty($this->getVoc() . $sde['DA'], "Relationship between a supplementary record and its date of entry"));
                 }
             }
             //if
             if ($k == "FR") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['FR'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['FR'], "Relationship between a supplementary record and its frequency"));
                 }
             }
             //if
             if ($k == "HM") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['HM'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['HM'], "Relationship between a supplementary record and its heading mapping"));
                 }
             }
             //if
             if ($k == "II") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['II'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['II'], "Relationship between a supplementary record and its indexing information"));
                 }
             }
             //if
             if ($k == "MR") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['MR'], $this->formatDate($vv), "xsd:date") . parent::describeProperty($this->getVoc() . $sde['MR'], "Relationship between a supplementary record and its major revision date"));
                 }
             }
             //if
             if ($k == "N1") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['N1'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['N1'], "Relationship between a supplementary record and its cas 1 name"));
                 }
             }
             //if
             if ($k == "NM") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['NM'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['NM'], "Relationship between a supplementary record and its name of substance"));
                 }
             }
             //if
             if ($k == "NM_TH") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['NM_TH'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['NM_TH'], "Relationship between a supplementary record and its term thesaurus id"));
                 }
             }
             //if
             if ($k == "NO") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['NO'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['NO'], "Relationship between a supplementary record and its note"));
                 }
             }
             //if
             if ($k == "PA") {
                 foreach ($v as $kv => $vv) {
                     $vlabel = utf8_encode(htmlspecialchars($vv));
                     $vid = parent::getRes() . md5($vv);
                     parent::AddRDF(parent::describeIndividual($vid, $vlabel, parent::getVoc() . "Pharmacological-Action", $vlabel) . parent::triplify($sr_res, $this->getVoc() . $sde['PA'], $vid) . parent::describeProperty($this->getVoc() . $sde['PA'], "Relationship between a supplementary record and its pharmacological action"));
                 }
             }
             //if
             if ($k == "PI") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['PI'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['PI'], "Relationship between a supplementary record and its previous indexing"));
                 }
             }
             //if
             if ($k == "RECTYPE") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['RECTYPE'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['RECTYPE'], "Relationship between a supplementary record and its record type"));
                 }
             }
             //if
             if ($k == "RN") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['RN'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['RN'], "Relationship between a supplementary record and its cas registry number or ec number"));
                 }
             }
             //if
             if ($k == "RR") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['RR'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['RR'], "Relationship between a supplementary record and its related cas registry number"));
                 }
             }
             //if
             if ($k == "SO") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['SO'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['SO'], "Relationship between a supplementary record and its source"));
                 }
             }
             //if
             if ($k == "ST") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['ST'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['ST'], "Relationship between a supplementary record and its semantic type"));
                 }
             }
             //if
             if ($k == "SY") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['SY'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['SY'], "Relationship between a supplementary record and its synonym"));
                 }
             }
             //if
             if ($k == "TH") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($sr_res, $this->getVoc() . $sde['TH'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $sde['TH'], "Relationship between a supplementary record and its thesaurus id"));
                 }
             }
             //if
         } else {
             trigger_error("Please add key to descriptor record map: " . $k . PHP_EOL, E_USER_ERROR);
         }
         $this->WriteRDFBufferToWriteFile();
     }
     //foreach
     $this->WriteRDFBufferToWriteFile();
 }
Beispiel #6
0
 function process()
 {
     $refseq_record_str = "";
     while ($aLine = $this->getReadFile()->Read(40960)) {
         preg_match("/^\\/\\/\$/", $aLine, $matches);
         if (!count($matches)) {
             preg_match("/^\n\$/", $aLine, $matches);
             if (count($matches) == 0) {
                 $refseq_record_str .= $aLine . PHP_EOL;
             }
             continue;
         } else {
             //now remove the header if it is there
             $refseq_record_str = $this->removeHeader($refseq_record_str);
             $sectionsRaw = $this->parseGenbankRaw($refseq_record_str);
             /**
              * SECTIONS being parsed:
              * locus, definition, accession, version, keywords, source
              * features
              **/
             //get the locus section
             $locus = $this->retrieveSections("LOCUS", $sectionsRaw);
             $parsed_locus_arr = $this->parseLocus($locus);
             //get the definition
             $definition = $this->retrieveSections("DEFINITION", $sectionsRaw);
             $parsed_definition_arr = $this->parseDefinition($definition);
             //get the accession
             $accessions = $this->retrieveSections("ACCESSION", $sectionsRaw);
             $parsed_accession_arr = $this->parseAccession($accessions);
             //get the version
             $versions = $this->retrieveSections("VERSION", $sectionsRaw);
             $parsed_version_arr = $this->parseVersion($versions);
             //get the keywords
             $keywords = $this->retrieveSections("KEYWORDS", $sectionsRaw);
             $parsed_keyword_arr = $this->parseKeywords($keywords);
             //get the reference section
             $references = $this->retrieveSections("REFERENCE", $sectionsRaw);
             $parsed_refs_arr = $this->parseReferences($references);
             //get the source section
             $source = $this->retrieveSections("SOURCE", $sectionsRaw);
             $parsed_source_arr = $this->parseSource($source);
             //get the features
             $features = $this->retrieveSections("FEATURES", $sectionsRaw);
             $parsed_features_arr = $this->parseFeatures($features);
             //lets make some rdf
             $refseq_res = $this->getNamespace() . $parsed_version_arr['versioned_accession'];
             $refseq_label = utf8_encode(htmlspecialchars($parsed_definition_arr[0]));
             parent::AddRDF(parent::describeIndividual($refseq_res, $refseq_label, $this->getVoc() . 'refseq-record') . parent::triplifyString($refseq_res, $this->getVoc() . 'sequence-length', $parsed_locus_arr[0]['sequence_length']) . parent::triplifyString($refseq_res, $this->getVoc() . 'chromosome-shape', $parsed_locus_arr[0]['chromosome_shape']) . parent::triplifyString($refseq_res, $this->getVoc() . 'date-of-entry', $parsed_locus_arr[0]['date']) . parent::triplifyString($refseq_res, $this->getVoc() . 'source', utf8_encode($parsed_source_arr[0])) . parent::triplify($refseq_res, $this->getVoc() . 'fasta-seq', 'https://www.ncbi.nlm.nih.gov/sviewer/viewer.cgi?sendto=on&db=nucest&dopt=fasta&val=' . $parsed_version_arr['gi']) . parent::triplify('https://www.ncbi.nlm.nih.gov/sviewer/viewer.cgi?sendto=on&db=nucest&dopt=fasta&val=' . $parsed_version_arr['gi'], "rdf:type", $this->getVoc() . 'fasta-sequence'));
             //add the features to the rdf
             foreach ($parsed_features_arr as $aFeature) {
                 $type = $aFeature['type'];
                 $feat_desc = $this->getFeatures($type);
                 $label = $type;
                 $def = '';
                 if (isset($feat_desc['definition'])) {
                     $def = preg_replace('/\\s\\s*/', ' ', $feat_desc['definition']);
                 }
                 $comment = null;
                 //					$value = $aFeature['value'];
                 $value = str_replace("UniProtKB/Swiss-Prot", "UniProt", $aFeature['value']);
                 // imperfect solution.
                 $value_arr = explode("/", $value);
                 $location = preg_replace('/\\n/', '', $value_arr[0]);
                 $class_id = parent::getVoc() . md5($type);
                 $feat_res = parent::getRes() . md5($type . $location . $refseq_res);
                 $feat_label = utf8_encode($type . " " . $location . " for " . $refseq_res);
                 if (isset($feat_desc['comment'])) {
                     $comment = $feat_desc['comment'];
                     $comment = preg_replace('/\\s\\s*/', ' ', $comment);
                     $label .= " " . $comment;
                 }
                 parent::AddRDF(parent::describeClass($class_id, $label, parent::getVoc() . "Feature", $label, $def) . parent::describeIndividual($feat_res, $feat_label, $class_id) . parent::triplify($refseq_res, $this->getVoc() . "has-feature", $feat_res));
                 foreach ($value_arr as $aL) {
                     //check if aL has an equals in it
                     $p = "/(\\S+)\\=(.*)/";
                     preg_match($p, $aL, $m);
                     if (count($m)) {
                         if ($m[1] == "db_xref") {
                             parent::AddRDF(parent::triplify($feat_res, "rdfs:seeAlso", str_replace("\"", "", $m[2])));
                         } else {
                             parent::AddRDF(parent::triplifyString($feat_res, $this->getVoc() . $m[1], utf8_encode(str_replace("\"", "", $m[2]))));
                         }
                     }
                 }
             }
             //add the accession
             foreach ($parsed_accession_arr[0] as $acc) {
                 parent::AddRDF(parent::triplifyString($refseq_res, $this->getVoc() . "accession", $acc));
             }
             //versioned accession
             if (isset($parsed_version_arr['versioned_accession'])) {
                 parent::AddRDF(parent::triplifyString($refseq_res, $this->getVoc() . "versioned-accession", $parsed_version_arr['versioned_accession']));
             }
             //keywords
             foreach ($parsed_keyword_arr as $akw) {
                 parent::AddRDF(parent::triplifyString($refseq_res, $this->getVoc() . "keyword", $akw));
             }
             //references
             foreach ($parsed_refs_arr as $aRef) {
                 $r = rand();
                 $ref_res = $this->getRes() . md5($r);
                 $ref_label = "reference for " . $refseq_res;
                 if (isset($aRef['TITLE'])) {
                     parent::AddRDF(parent::describeIndividual($ref_res, $ref_label, $this->getVoc() . "reference") . parent::triplifyString($ref_res, $this->getVoc() . "title", $aRef['TITLE']));
                 }
                 if (isset($aRef['PUBMED'])) {
                     parent::AddRDF(parent::triplify($ref_res, $this->getVoc() . "x-pubmed", 'pubmed:' . $aRef['PUBMED']));
                 }
                 if (isset($aRef['AUTHORS'])) {
                     parent::AddRDF(parent::triplifyString($ref_res, $this->getVoc() . "authors", $aRef['AUTHORS']));
                 }
                 if (isset($aRef['COORDINATES'])) {
                     parent::AddRDF(parent::triplify($refseq_res, $this->getVoc() . "reference", $ref_res) . parent::triplifyString($ref_res, $this->getVoc() . "coordinates", $aRef['COORDINATES']) . parent::triplifyString($ref_res, $this->getVoc() . "citation", $aRef['JOURNAL']));
                 } else {
                     parent::AddRDF(parent::triplify($refseq_res, $this->getVoc() . "reference", $ref_res) . parent::triplifyString($ref_res, $this->getVoc() . "citation", $aRef['JOURNAL']));
                 }
             }
             $refseq_record_str = "";
             $this->WriteRDFBufferToWriteFile();
             continue;
         }
     }
     //while
 }
Beispiel #7
0
 function CTD_chem_pathways_enriched()
 {
     $first = true;
     while ($l = $this->GetReadFile()->Read()) {
         if ($l[0] == '#') {
             continue;
         }
         // check number of columns
         $a = explode("\t", trim($l));
         if ($first) {
             if (($c = count(explode("\t", $l))) != 11) {
                 trigger_error("CTD_chem_pathways_enriched function expects 11 fields, found {$c}!" . PHP_EOL, E_USER_WARNING);
                 return FALSE;
             }
             $first = false;
         }
         $chemical_id = $a[1];
         $this->getRegistry()->parseQName($a[4], $pathway_ns, $pathway_id);
         if ($pathway_ns == "react") {
             $pathway_ns = "reactome";
         }
         $pathway_resource_id = parent::getRes() . md5($chemical_id . $pathway_ns . $pathway_id . $a[6]);
         $pathway_resource_label = "Chemical-pathway association between mesh:" . $chemical_id . " and " . $pathway_ns . ":" . $pathway_id . " with p-value " . $a[6];
         $this->AddRDF(parent::describeIndividual($pathway_resource_id, $pathway_resource_label, parent::getVoc() . "Chemical-Pathway-Association") . parent::describeClass(parent::getVoc() . "Chemical-Pathway-Association", "Chemical-Pathway Association") . parent::triplify($pathway_resource_id, $this->getVoc() . "pathway", $pathway_ns . ":" . $pathway_id) . parent::triplify($pathway_resource_id, parent::getVoc() . "chemical", "mesh:" . $chemical_id) . parent::triplifyString($pathway_resource_id, $this->getVoc() . "p-value", $a[6], "xsd:double"));
         parent::WriteRDFBufferToWriteFile();
     }
     return TRUE;
 }
Beispiel #8
0
 function parseEntry($lfile)
 {
     $fp = fopen($lfile, "r");
     while ($l = fgets($fp, 100000)) {
         $k_t = trim(substr($l, 0, 12));
         $v = trim(substr($l, 12));
         if (!$k_t and $v == '') {
             continue;
         }
         // set the key to the current key if not empty, else keep using what was there before
         if (!isset($k)) {
             $k = $k_t;
         } else {
             if (!empty($k_t)) {
                 $k = $k_t;
             }
         }
         if ($k == "///" or $k == "ENTRY1") {
             break;
         }
         if ($k == "ENTRY") {
             $a = explode("  ", $v, 2);
             $e['id'] = str_replace(array("EC ", " "), "", $a[0]);
             if (isset($this->org)) {
                 $e['id'] = $this->org . "_" . $e['id'];
             }
             $e['type'] = trim(str_replace(array("Complete ", "Pathway   Module"), array("", "Pathway Module"), $a[1]));
             $e['type_label'] = str_replace(" ", "-", $e['type']);
             $uri = parent::getNamespace() . $e['id'];
             continue;
         }
         // key with value
         if (in_array($k, array("NAME", "DESCRIPTION", "DEFINITION", "EQUATION", "COMMENT"))) {
             if ($k == "NAME") {
                 parent::addRDF(parent::describeIndividual($uri, $v, parent::getVoc() . $e['type']) . parent::describeClass(parent::getVoc() . $e['type'], $e['type_label']) . parent::triplify($uri, "rdfs:seeAlso", "http://www.kegg.jp/dbget-bin/www_bget?" . $e['id']));
                 if ($e['type'] == 'Genome') {
                     $a = explode(",", $v);
                     parent::addRDF(parent::triplify($uri, "owl:sameAs", "kegg:" . $a[0]));
                 }
             } else {
                 if ($k == "DESCRIPTION") {
                     parent::addRDF(parent::triplifyString($uri, "dc:description", $v));
                 } else {
                     if ($k == "DEFINITION" and $e['type'] == "KO") {
                         preg_match("/\\[([^\\]]+)\\]/", $v, $m);
                         if (isset($m[1])) {
                             parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-ec", $m[1]));
                         }
                     } else {
                         if ($k == "COMMENT") {
                             preg_match("/ICD-O: ([^,]+),/", $v, $m);
                             if (isset($m[1])) {
                                 parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-icdo", "icdo:" . $m[1]));
                                 continue;
                             }
                         } else {
                             parent::addRDF(parent::triplifyString($uri, parent::getVoc() . strtolower($k), $v));
                         }
                     }
                 }
             }
             continue;
         }
         if ($k == "RPAIR" and $e['type'] == "Reaction") {
             $list = explode(" ", $v);
             $id = parent::getRes() . $e['id'] . "." . $list[2] . "." . $list[3];
             $rc = '';
             if (isset($list[4])) {
                 $rc = "kegg:" . substr($list[4], 4, -1);
             }
             parent::addRDF(parent::describeIndividual($id, $e['id'] . " " . $v, parent::getVoc() . "RPair-Role") . parent::describeClass(parent::getVoc() . "RPair-Role", "RPair Role") . parent::triplify($id, parent::getVoc() . "rpair", "kegg:" . $list[0]) . parent::triplifyString($id, parent::getVoc() . "role", $list[3]) . ($rc != '' ? parent::triplify($id, parent::getVoc() . "reaction-center", $rc) : '') . parent::triplify($uri, parent::getVoc() . "rpair-role", $id));
             continue;
         }
         // list of entries
         if (in_array($k, array("ENZYME", "RPAIR", "RELATEDPAIR")) or in_array($e['type'], array("Compound", "RClass", "RPair")) and $k == "REACTION") {
             $list = explode(" ", $v);
             foreach ($list as $id) {
                 if (!$id) {
                     continue;
                 }
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:{$id}"));
             }
             continue;
         }
         // key with semi-colon separated values
         if (in_array($k, array("CLASS", "CATEGORY", "KEYWORDS", "CHROMOSOME", "ANNOTATION", "ACTIVITY", "TYPE"))) {
             $a = explode(";", $v);
             foreach ($a as $c) {
                 parent::addRDF(parent::triplifyString($uri, parent::getVoc() . strtolower($k), trim($c)));
             }
             continue;
         }
         // kegg seems to make a prefix mistake with the pathway identifiers...
         if ($k == "PATHWAY") {
             $a = explode("  ", $v, 2);
             preg_match("/[a-z]+([0-9]{5})/", $a[0], $m);
             if (isset($m[1])) {
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:map" . $m[1]));
             } else {
                 echo "pathway problem: " . $v . PHP_EOL;
             }
             continue;
         }
         // multi-line header with key-value pair
         if (in_array($k, array("PATHWAY_MAP", "STR_MAP", "MODULE", "DISEASE", "KO_PATHWAY", "COMPOUND"))) {
             // PATHWAY_MAP map00010  Glycolysis / Gluconeogenesis
             $a = explode("  ", $v, 2);
             $mid = $a[0];
             if (strpos($a[0], '(') !== FALSE) {
                 $mid = substr($a[0], 0, strpos($a[0], '('));
             }
             if (isset($this->org) and $k == "MODULE") {
                 $mid = substr($mid, strpos($v, "_") + 1);
             }
             parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:" . $mid));
             continue;
         }
         // REACTION parsing
         if (preg_match("/\\[RN:([^\\]]+)]/", $v, $m) != FALSE) {
             $list = explode(" ", $m[1]);
             foreach ($list as $item) {
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:" . $item));
             }
             continue;
         }
         if ($k == "DRUG") {
             preg_match("/\\[DR:([^\\]]+)]/", $v, $m);
             if (isset($m[1])) {
                 $list = explode(" ", $m[1]);
                 foreach ($list as $item) {
                     parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:" . $item));
                 }
                 continue;
             }
         }
         if ($k == "TAXONOMY") {
             parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:" . str_replace("TAX", "taxonomy", $v)));
             continue;
         }
         // a list of objects to parse out that are defined within square brackets
         if (in_array($k, array("SOURCE", "COMPONENT"))) {
             preg_match_all("/\\[([^\\]]+)\\]/", $v, $m);
             if (isset($m[1])) {
                 foreach ($m[1] as $id) {
                     $myid = str_replace(array("TAX", "CPD", "DR"), array("taxonomy", "kegg", "kegg"), $id);
                     parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), $myid));
                 }
                 continue;
             }
         }
         // multi-line header with multi-key single value pair
         if (in_array($k, array("ORTHOLOGY", "REACTION"))) {
             // K00844,K12407,K00845  hexokinase/glucokinase [EC:2.7.1.1 2.7.1.2] [RN:R01786]
             // R01786,R02189,R09085  C00267 -> C00668
             $a = explode(" ", $v, 2);
             $ids = explode(",", $a[0]);
             if ($k == "REACTION" and $ids[0][0] != "R") {
                 echo "unable to parse {$k}" . PHP_EOL;
                 continue;
             }
             if (!isset($a[1])) {
                 if ($e['type'] == "Reaction") {
                     parent::addRDF(parent::triplify($uri, parent::getVoc() . "orthology", "kegg:" . trim($a[0])));
                     continue;
                 }
                 echo "parse error: " . $k . " " . $v . PHP_EOL;
                 continue;
             }
             $str = $a[1];
             foreach ($ids as $id) {
                 $o = '';
                 $o['id'] = $id;
                 $o['label'] = $str;
                 $o['type'] = strtolower($k);
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:{$id}"));
             }
             continue;
         }
         if ($k == "DBLINKS") {
             // DBLINKS     GO: 0006096 0006094
             $a = explode(": ", $v, 2);
             $ns = str_replace(array("ncbi-geneid", "ncbi-gi", "rn", "pubchem", "pdb-ccd", "icd-10", "um-bbd", "iubmb enzyme nomenclature", "explorenz - the enzyme database", "expasy - enzyme nomenclature database", "umbbd (biocatalysis/biodegradation database)", "brenda, the enzyme database"), array("ncbigene", "gi", "kegg", "pubchem.compound", "ccd", "icd10", "umbbd", "ec", "ec", "ec", "ec", "ec"), strtolower($a[0]));
             $ids = explode(" ", $a[1]);
             foreach ($ids as $id) {
                 if (!$id) {
                     continue;
                 }
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-{$ns}", "{$ns}:{$id}"));
             }
             continue;
         }
         if ($k == "REMARK") {
             preg_match("/Same as: ([A-Z0-9]+)/", $v, $m);
             if (isset($m[1])) {
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . "same-as", "kegg:" . $m[1]));
                 continue;
             }
             preg_match("/ATC code: (.*)/", $v, $m);
             if (isset($m[1])) {
                 $list = explode(" ", $m[1]);
                 foreach ($list as $item) {
                     parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-atc", "atc:" . $item));
                 }
                 continue;
             }
             preg_match("/Therapeutic category: (.*)/", $v, $m);
             if (isset($m[1])) {
                 $list = explode(" ", $m[1]);
                 foreach ($list as $item) {
                     parent::addRDF(parent::triplifyString($uri, parent::getVoc() . "therapeutic-category", $item));
                 }
                 continue;
             }
             preg_match("/Drug group: (.*)/", $v, $m);
             if (isset($m[1])) {
                 $list = explode(" ", $m[1]);
                 foreach ($list as $item) {
                     parent::addRDF(parent::triplify($uri, parent::getVoc() . "drug-group", "kegg:" . $item));
                 }
                 continue;
             }
         }
         if ($k == "PRODUCT" or $k == "SUBSTRATE") {
             preg_match("/([a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})/", $v, $m);
             if (isset($m[1])) {
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-dailymed", "dailymed:" . $m[1]) . parent::triplifyString("dailymed:" . $m[1], "rdfs:label", $v));
                 continue;
             }
             preg_match("/\\[CPD:([^\\]]+)\\]/", $v, $m);
             if (isset($m[1])) {
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:" . $m[1]));
                 continue;
             }
         }
         if ($k == "STATISTICS") {
             $a = explode(": ", $v);
             parent::addRDF(parent::triplifyString($uri, parent::getVoc() . str_replace(" ", "-", strtolower($a[0])), $a[1]));
             continue;
         }
         if ($k == "ORGANISM") {
             $a = explode(" ", $v);
             parent::addRDF(parent::triplify($uri, parent::getVoc() . "organism", "kegg:" . $a[0]));
             continue;
         }
         if ($k == "REFERENCE") {
             if (!isset($ref)) {
                 $ref = 1;
             } else {
                 if (!isset($e['reference'][$ref]['title'])) {
                     continue;
                 }
                 // this is a bug where the reference declaration is split onto two lines
                 $ref++;
             }
             if (strstr($v, "PMID")) {
                 // PMID:11529849 (marker)
                 preg_match("/(PMID:[0-9]+) /", $v, $m);
                 if (isset($m[1])) {
                     $e['reference'][$ref]['pubmed'] = $m[1];
                 }
             }
             continue;
         }
         if ($k == "AUTHORS") {
             $e['reference'][$ref]['authors'] = $v;
             continue;
         }
         if ($k == "TITLE") {
             $e['reference'][$ref]['title'] = $v;
             continue;
         }
         if ($k == "JOURNAL") {
             $e['reference'][$ref]['journal'] = $v;
             continue;
         }
         if ($e['type'] == "Disease" and ($k == "GENE" or $k == "MARKER")) {
             // BCR-ABL (translocation) [HSA:613 25] [KO:K08878 K06619]
             preg_match_all("/ \\[([^\\]]+)\\]/", $v, $m);
             if (isset($m[1])) {
                 foreach ($m[1] as $idlist) {
                     $a = explode(":", $idlist);
                     $ns = $a[0];
                     $b = explode(" ", $a[1]);
                     foreach ($b as $id) {
                         if ($ns == "KO") {
                             $rel = "ko-" . strtolower($k);
                             $gene = $id;
                         } else {
                             $rel = strtolower($k);
                             $gene = $ns . "_" . $id;
                         }
                         parent::addRDF(parent::triplify($uri, parent::getVoc() . $rel, "kegg:{$gene}"));
                     }
                 }
             } else {
                 echo $v;
             }
             continue;
         }
         if ($k == "GENES") {
             // ATH: AT1G32780 AT1G64710 AT1G77120(ADH1) AT5G24760
             $a = explode(": ", $v);
             $org = $a[0];
             $b = explode(" ", $a[1]);
             foreach ($b as $id) {
                 $c = explode("(", $id);
                 $gene = parent::getNamespace() . $org . "_" . $c[0];
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . "gene", $gene));
             }
             //echo parent::getRDF();exit;
             continue;
         }
         if ($k == "DRUG_TARGET") {
             // Afatinib: D09724 D09733
             $s = substr($v, strpos($v, ":") + 2);
             $list = explode(" ", $s);
             foreach ($list as $item) {
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . "drug-target", "kegg:{$item}"));
             }
             continue;
         }
         if ($k == "STRUCTURE") {
             $list = explode(" ", $v);
             foreach ($list as $item) {
                 if (trim($item) == '') {
                     continue;
                 }
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-pdb", "pdb:{$item}"));
             }
             continue;
         }
         if ($k == "MOTIF") {
             $list = explode(" ", $v);
             foreach ($list as $item) {
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-pfam", "pfam:{$item}"));
             }
             continue;
         }
         if (in_array($k, array("INTERACTION", "METABOLISM", "TARGET"))) {
             // dopamine D2-receptor antagonist [HSA:1813] [KO:K04145]
             $id = parent::getRes() . md5($uri . $v);
             $type = ucfirst(strtolower($k));
             if (in_array($k, array("INTERACTION", "METABOLISM"))) {
                 $a = explode(":", $v, 2);
                 $modifier = $a[0];
             } else {
                 $modifier = '';
                 $s = substr($v, 0, strpos($v, "[") + 1);
                 // dopamine D2-receptor antagonist [
                 preg_match("/ ([a-z]+) \\[/", $s, $m);
                 if (isset($m[1])) {
                     $modifier = $m[1];
                 }
             }
             parent::addRDF(parent::describeIndividual($id, $v, parent::getVoc() . $type) . parent::describeClass(parent::getVoc() . $type, $type) . parent::triplifyString($id, parent::getVoc() . "modifier", $modifier) . parent::triplify($uri, parent::getVoc() . strtolower($k), $id));
             preg_match_all("/ \\[([^\\]]+)\\]/", $v, $m);
             if (isset($m[1])) {
                 foreach ($m[1] as $item) {
                     if (!strstr($item, "KO")) {
                         $item = "kegg:" . str_replace(":", "_", $item);
                     } else {
                         $item = str_replace("KO:", "kegg:", $item);
                     }
                     parent::addRDF(parent::triplify($id, parent::getVoc() . "link", $item));
                 }
             }
             continue;
         }
         // skip these
         if (in_array($k, array("ATOM", "BOND", "BRITE", "AASEQ", "NTSEQ", "SEQUENCE"))) {
             continue;
         }
         // simple strings to keep as is
         if (in_array($k, array("EXACT_MASS", "FORMULA", "MOL_WEIGHT", "LINEAGE", "LENGTH", "MASS", "COMPOSITION", "NODE", "EDGE", "POSITION"))) {
             parent::addRDF(parent::triplifyString($uri, parent::getVoc() . strtolower($k), $v));
             continue;
         }
         // default catchall
         parent::addRDF(parent::triplifyString($uri, parent::getVoc() . strtolower($k), $v . " [script:default]"));
     }
     if (isset($e['reference'])) {
         foreach ($e['reference'] as $i => $r) {
             $ref = parent::getRes() . $e['id'] . ".ref.{$i}";
             parent::addRDF(parent::describeIndividual($ref, $r['title'], parent::getVoc() . "Reference") . parent::describeClass(parent::getVoc() . "Reference", "Reference") . parent::triplifyString($ref, parent::getVoc() . "authors", $r['authors']) . parent::triplifyString($ref, parent::getVoc() . "journal", $r['journal']) . parent::triplify($uri, parent::getVoc() . "reference", $ref));
             if (isset($r['pubmed'])) {
                 parent::addRDF(parent::triplify($ref, parent::getVoc() . "x-pubmed", $r['pubmed']));
             }
         }
     }
     fclose($fp);
 }
Beispiel #9
0
 function MGI_Geno_NotDisease()
 {
     $line = 1;
     while ($l = $this->getReadFile()->read(248000)) {
         $a = explode("\t", $l);
         if (count($a) != 8) {
             trigger_error("Incorrect number of columns", E_USER_WARNING);
             continue;
         }
         $genotype = $a[0];
         $alleles = explode("|", strtolower($a[2]));
         $diseases = explode(",", $a[7]);
         foreach ($diseases as $d) {
             $disease = "omim:{$d}";
             foreach ($alleles as $allele) {
                 $id = parent::getRes() . md5($allele . $disease);
                 $label = "{$allele} {$disease} absent association";
                 parent::addRDF(parent::describeIndividual($id, $label, $this->getVoc() . "Allele-Disease-Non-Association") . parent::describeClass($this->getVoc() . "Allele-Disease-Non-Association", "MGI Allele-Disease Non-Association") . parent::triplify($id, $this->getVoc() . "allele", $allele) . parent::triplifyString($id, $this->getVoc() . "genotype-string", $genotype) . parent::triplify($id, $this->getVoc() . "disease", $disease) . parent::triplifyString($id, $this->getVoc() . "is-negated", "true"));
                 if ($a[5]) {
                     $pmids = explode(",", $a[5]);
                     foreach ($pmids as $pmid) {
                         parent::addRDF(parent::triplify($id, $this->getVoc() . "x-pubmed", "pubmed:" . $pmid));
                     }
                 }
             }
         }
         $this->writeRDFBufferToWriteFile();
     }
 }
Beispiel #10
0
 function ParseEntry($obj, $type)
 {
     $o = $obj["omim"]["entryList"][0]["entry"];
     $omim_id = $o['mimNumber'];
     $omim_uri = parent::getNamespace() . $o['mimNumber'];
     if (isset($o['version'])) {
         parent::setDatasetVersion($o['version']);
     }
     // add the links
     parent::addRDF($this->QQuadO_URL($omim_uri, "rdfs:seeAlso", "http://omim.org/entry/" . $omim_id));
     parent::addRDF($this->QQuadO_URL($omim_uri, "owl:sameAs", "http://identifiers.org/omim/" . $omim_id));
     // parse titles
     $titles = $o['titles'];
     parent::addRDF(parent::describeIndividual($omim_uri, $titles['preferredTitle'], parent::getVoc() . str_replace(array(" ", "/"), "-", ucfirst($type))) . parent::describeClass(parent::getVoc() . str_replace(array(" ", "/"), "-", ucfirst($type)), $type));
     if (isset($titles['preferredTitle'])) {
         parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "preferred-title", $titles['preferredTitle']));
     }
     if (isset($titles['alternativeTitles'])) {
         $b = explode(";;", $titles['alternativeTitles']);
         foreach ($b as $title) {
             parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "alternative-title", trim($title)));
         }
     }
     // parse text sections
     if (isset($o['textSectionList'])) {
         foreach ($o['textSectionList'] as $i => $section) {
             if ($section['textSection']['textSectionTitle'] == "Description") {
                 parent::addRDF(parent::triplifyString($omim_uri, "dc:description", $section['textSection']['textSectionContent']));
             } else {
                 $p = str_replace(" ", "-", strtolower($section['textSection']['textSectionTitle']));
                 parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "{$p}", $section['textSection']['textSectionContent']));
             }
             // parse the omim references
             preg_match_all("/\\{([0-9]{6})\\}/", $section['textSection']['textSectionContent'], $m);
             if (isset($m[1][0])) {
                 foreach ($m[1] as $oid) {
                     parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "refers-to", "omim:{$oid}"));
                 }
             }
         }
     }
     // allelic variants
     if (isset($o['allelicVariantList'])) {
         foreach ($o['allelicVariantList'] as $i => $v) {
             $v = $v['allelicVariant'];
             $uri = parent::getRes() . "{$omim_id}" . "_allele_" . $i;
             $label = str_replace("\n", " ", $v['name']);
             parent::addRDF(parent::describeIndividual($uri, $label, parent::getVoc() . "Allelic-Variant") . parent::describeClass(parent::getVoc() . "Allelic-Variant", "Allelic Variant"));
             if (isset($v['alternativeNames'])) {
                 $names = explode(";;", $v['alternativeNames']);
                 foreach ($names as $name) {
                     $name = str_replace("\n", " ", $name);
                     parent::addRDF(parent::triplifyString($uri, parent::getVoc() . "alternative-names", $name));
                 }
             }
             if (isset($v['text'])) {
                 parent::addRDF(parent::triplifyString($uri, "dc:description", $v['text']));
             }
             if (isset($v['mutations'])) {
                 parent::addRDF(parent::triplifyString($uri, parent::getVoc() . "mutation", $v['mutations']));
             }
             if (isset($v['dbSnps'])) {
                 $snps = explode(",", $v['dbSnps']);
                 foreach ($snps as $snp) {
                     parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-dbsnp", "dbsnp:" . $snp));
                 }
             }
             parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "variant", $uri));
         }
     }
     // clinical synopsis
     if (isset($o['clinicalSynopsis'])) {
         $cs = $o['clinicalSynopsis'];
         $cs_uri = parent::getRes() . "" . $omim_id . "_cs";
         parent::addRDF(parent::describeIndividual($cs_uri, "Clinical synopsis for omim {$omim_id}", parent::getVoc() . "Clinical-Synopsis") . parent::describeClass(parent::getVoc() . "Clinical-Synopsis", "Clinical Synopsis") . parent::triplify($omim_uri, parent::getVoc() . "clinical-synopsis", $cs_uri));
         foreach ($cs as $k => $v) {
             if (!strstr($k, "Exists")) {
                 // ignore the boolean assertion.
                 // @todo ignore provenance for now
                 if (in_array($k, array('contributors', 'creationDate', 'editHistory', 'epochCreated', 'dateCreated', 'epochUpdated', 'dateUpdated'))) {
                     continue;
                 }
                 if (!is_array($v)) {
                     $v = array($k => $v);
                 }
                 foreach ($v as $k1 => $v1) {
                     $phenotypes = explode(";", $v1);
                     foreach ($phenotypes as $coded_phenotype) {
                         // parse out the codes
                         $coded_phenotype = trim($coded_phenotype);
                         if (!$coded_phenotype) {
                             continue;
                         }
                         $phenotype = preg_replace("/\\{.*\\}/", "", $coded_phenotype);
                         $phenotype_id = parent::getRes() . "" . md5(strtolower($phenotype));
                         $entity_id = parent::getRes() . "" . $k1;
                         parent::addRDF(parent::describeIndividual($phenotype_id, $phenotype, parent::getVoc() . 'Characteristic') . parent::describeClass(parent::getVoc() . 'Characteristic', 'Characteristic') . parent::triplify($cs_uri, parent::getVoc() . "feature", $phenotype_id) . parent::describeIndividual($entity_id, $k1, parent::getVoc() . "Entity") . parent::describeClass(parent::getVoc() . "Entity", "Entity") . parent::triplify($phenotype_id, parent::getVoc() . "characteristic-of", $entity_id));
                         // parse out the vocab references
                         preg_match_all("/\\{([0-9A-Za-z \\:\\-\\.]+)\\}|;/", $coded_phenotype, $codes);
                         //preg_match_all("/((UMLS|HPO HP|SNOMEDCT|ICD10CM|ICD9CM|EOM ID)\:[A-Z0-9]+)/",$coded_phenotype,$m);
                         if (isset($codes[1][0])) {
                             foreach ($codes[1] as $entry) {
                                 $entries = explode(" ", trim($entry));
                                 foreach ($entries as $e) {
                                     if ($e == "HPO" || $e == "EOM") {
                                         continue;
                                     }
                                     $this->getRegistry()->parseQName($e, $ns, $id);
                                     if (!isset($ns) || $ns == '') {
                                         $b = explode(".", $id);
                                         $ns = "omim";
                                         $id = $b[0];
                                     } else {
                                         $ns = str_replace(array("hpo", "id", "icd10cm", "icd9cm", "snomedct"), array("hp", "eom", "icd10", "icd9", "snomed"), $ns);
                                     }
                                     parent::addRDF(parent::triplify($phenotype_id, parent::getVoc() . "x-{$ns}", "{$ns}:{$id}"));
                                 }
                                 // foreach
                             }
                             // foreach
                         }
                         // codes
                     }
                     //foreach
                 }
                 // foreach
             }
             // exists
         }
     }
     // clinical synopsis
     // genemap
     if (isset($o['geneMap'])) {
         $map = $o['geneMap'];
         if (isset($map['chromosome'])) {
             parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "chromosome", (string) $map['chromosome']));
         }
         if (isset($map['cytoLocation'])) {
             parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "cytolocation", (string) $map['cytoLocation']));
         }
         if (isset($map['geneSymbols'])) {
             $b = preg_split("/[,;\\. ]+/", $map['geneSymbols']);
             foreach ($b as $symbol) {
                 parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "gene-symbol", "symbol:" . trim($symbol)));
             }
         }
         if (isset($map['geneName'])) {
             $b = explode(",", $map['geneName']);
             foreach ($b as $name) {
                 parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "gene-name", trim($name)));
             }
         }
         if (isset($map['mappingMethod'])) {
             $b = explode(",", $map['mappingMethod']);
             foreach ($b as $c) {
                 $mapping_method = trim($c);
                 $method_uri = $this->get_method_type($mapping_method);
                 if ($method_uri !== false) {
                     parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "mapping-method", $method_uri));
                 }
             }
         }
         if (isset($map['mouseGeneSymbol'])) {
             $b = explode(",", $map['mouseGeneSymbol']);
             foreach ($b as $c) {
                 parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "mouse-gene-symbol", "symbol:" . strtoupper($c)));
             }
         }
         if (isset($map['mouseMgiID'])) {
             $b = explode(",", $map['mouseMgiID']);
             foreach ($b as $c) {
                 parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-mgi", $c));
             }
         }
         if (isset($map['geneInheritance']) && $map['geneInheritance'] != '') {
             parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "gene-inheritance", $map['geneInheritance']));
         }
     }
     if (isset($o['phenotypeMapList'])) {
         foreach ($o['phenotypeMapList'] as $i => $phenotypeMap) {
             $phenotypeMap = $phenotypeMap['phenotypeMap'];
             $pm_uri = parent::getRes() . $omim_id . "_pm_" . ($i + 1);
             parent::addRDF(parent::describeIndividual($pm_uri, "phenotype mapping for {$omim_id}", parent::getVoc() . "Phenotype-Map") . parent::describeClass(parent::getVoc() . "Phenotype-Map", "OMIM Phenotype-Map") . parent::triplify($omim_uri, parent::getVoc() . "phenotype-map", $pm_uri));
             foreach (array_keys($phenotypeMap) as $k) {
                 if (in_array($k, array("mimNumber", "phenotypeMimNumber", "phenotypicSeriesMimNumber"))) {
                     parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . $k, "omim:" . $phenotypeMap[$k]));
                 } else {
                     if ($k == "geneSymbols") {
                         $l = explode(", ", $phenotypeMap[$k]);
                         foreach ($l as $gene) {
                             parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . "gene-symbol", "hgnc.symbol:" . $gene));
                         }
                     } else {
                         if ($k == "phenotypeMappingKey") {
                             $l = $this->get_phenotype_mapping_method_type($phenotypeMap[$k]);
                             parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . "mapping-method", $l));
                         } else {
                             parent::addRDF(parent::triplifyString($pm_uri, parent::getVoc() . $k, $phenotypeMap[$k]));
                         }
                     }
                 }
             }
         }
     }
     // references
     if (isset($o['referenceList'])) {
         foreach ($o['referenceList'] as $i => $r) {
             $r = $r['reference'];
             if (isset($r['pubmedID'])) {
                 $pubmed_uri = "pubmed:" . $r['pubmedID'];
                 parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "article", $pubmed_uri));
                 $title = 'article';
                 if (isset($r['title'])) {
                     $title = $r['title'];
                 }
                 parent::addRDF(parent::describe($pubmed_uri, addslashes($r['title'])));
                 if (isset($r['articleUrl'])) {
                     parent::addRDF($this->QQuadO_URL($pubmed_uri, "rdfs:seeAlso", htmlentities($r['articleUrl'])));
                 }
             }
         }
     }
     // external ids
     if (isset($o['externalLinks'])) {
         foreach ($o['externalLinks'] as $k => $id) {
             if ($id === false) {
                 continue;
             }
             $ns = '';
             switch ($k) {
                 case 'approvedGeneSymbols':
                     $ns = 'symbol';
                     break;
                 case 'geneIDs':
                     $ns = 'ncbigene';
                     break;
                 case 'ncbiReferenceSequences':
                     $ns = 'gi';
                     break;
                 case 'genbankNucleotideSequences':
                     $ns = 'gi';
                     break;
                 case 'proteinSequences':
                     $ns = 'gi';
                     break;
                 case 'uniGenes':
                     $ns = 'unigene';
                     break;
                 case 'ensemblIDs':
                     $ns = 'ensembl';
                     break;
                 case 'swissProtIDs':
                     $ns = 'uniprot';
                     break;
                 case 'mgiIDs':
                     $ns = 'mgi';
                     $b = explode(":", $id);
                     $id = $b[1];
                     break;
                 case 'flybaseIDs':
                     $ns = 'flybase';
                     break;
                 case 'zfinIDs':
                     $ns = 'zfin';
                     break;
                 case 'hprdIDs':
                     $ns = 'hprd';
                     break;
                 case 'orphanetDiseases':
                     $ns = 'orphanet';
                     break;
                 case 'refSeqAccessionIDs':
                     $ns = 'refseq';
                     break;
                 case 'ordrDiseases':
                     $ns = 'ordr';
                     $b = explode(";;", $id);
                     $id = $b[0];
                     break;
                 case 'snomedctIDs':
                     $ns = 'snomed';
                     break;
                 case 'icd10cmIDs':
                     $ns = 'icd10';
                     break;
                 case 'icd9cmIDs':
                     $ns = 'icd9';
                     break;
                 case 'umlsIDs':
                     $ns = 'umls';
                     break;
                 case 'wormbaseIDs':
                     $ns = 'wormbase';
                     break;
                 case 'diseaseOntologyIDs':
                     $ns = 'do';
                     break;
                     // specifically ignorning
                 // specifically ignorning
                 case 'geneTests':
                 case 'cmgGene':
                 case 'geneticAllianceIDs':
                     // #
                 // #
                 case 'nextGxDx':
                 case 'nbkIDs':
                     // NBK1207;;Alport Syndrome and Thin Basement Membrane Nephropathy
                 // NBK1207;;Alport Syndrome and Thin Basement Membrane Nephropathy
                 case 'newbornScreeningUrls':
                 case 'decipherUrls':
                 case 'geneReviewShortNames':
                 case 'locusSpecificDBs':
                 case 'geneticsHomeReferenceIDs':
                 case 'omiaIDs':
                 case 'coriellDiseases':
                 case 'clinicalDiseaseIDs':
                 case 'possumSyndromes':
                 case 'keggPathways':
                 case 'gtr':
                 case 'gwasCatalog':
                 case 'mgiHumanDisease':
                 case 'wormbaseDO':
                 case 'dermAtlas':
                     // true/false
                     break;
                 default:
                     echo "unhandled external link {$k} {$id}" . PHP_EOL;
             }
             $ids = explode(",", $id);
             foreach ($ids as $id) {
                 if ($ns) {
                     if (strstr($id, ";;") === FALSE) {
                         parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-{$ns}", $ns . ':' . $id));
                     } else {
                         $b = explode(";;", $id);
                         // multiple ids//names
                         foreach ($b as $c) {
                             preg_match("/([a-z])/", $c, $m);
                             if (!isset($m[1])) {
                                 parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-{$ns}", $ns . ':' . $c));
                             }
                         }
                     }
                 }
             }
         }
     }
     //external links
 }
Beispiel #11
0
 function Parse()
 {
     $l = parent::getReadFile()->read(100000);
     $header = explode("\t", trim(substr($l, 1)));
     if (($c = count($header)) != 54) {
         trigger_erorr("Expecting 54 columns, found {$c}!");
         return FALSE;
     }
     // check # of columns
     while ($l = parent::getReadFile()->read(500000)) {
         $a = explode("\t", trim($l));
         // irefindex identifiers
         $rigid = "irefindex." . $a[34];
         # checksum for interaction
         $rogida = "irefindex." . $a[32];
         # checksum for A
         $rogidb = "irefindex." . $a[33];
         # checksum for B
         $irigid = "irefindex.irigid:" . $a[44];
         # integer id for interaction
         $irogida = "irefindex.irogid:" . $a[42];
         # integer id for A
         $irogidb = "irefindex.irogid:" . $a[43];
         # integer id for B
         $crigid = "irefindex.crigid:" . $a[47];
         # checksum for canonical interaction
         $icrigid = "irefindex.icrigid:" . $a[50];
         # integer id for canonical interaction
         $crogida = "irefindex.crogid:" . $a[45];
         # checksum for A's canonical group
         $crogidb = "irefindex.crogid:" . $a[46];
         # checksum for B's canonical group
         $icrogida = "irefindex.icrogid:" . $a[48];
         # integer for A's canonical group
         $icrogidb = "irefindex.icrogid:" . $a[49];
         # integer for B's canonical group
         // 13 contains the original identifier, the rigid, and the edgetype
         $ids = explode("|", $a[13]);
         if (count($ids) != 3) {
             trigger_error("Expecting 3 entries in column 14");
             print_r($ids);
             exit;
         }
         parent::getRegistry()->parseQName($ids[0], $ns, $id);
         if ($id == '-') {
             // this happens with hprd
             $iid = "hprd:" . substr($ids[1], 6);
         } else {
             $iid = $ns . ":" . $id;
         }
         // get the type
         if ($a[52] == "X") {
             $label = "{$a['0']} - {$a['1']} Interaction";
             $type = "Pairwise-Interaction";
         } else {
             if ($a[52] == "C") {
                 $label = $a[53] . " component complex";
                 #num of participants
                 $type = "Multimeric-Complex";
             } else {
                 if ($a[52] == "Y") {
                     $label = "{$a['0']} homomeric complex";
                     $type = "Homopolymeric-Complex";
                 }
             }
         }
         parent::addRDF(parent::describeIndividual($iid, $label, parent::getVoc() . $type) . parent::describeClass(parent::getVoc() . $type, str_replace("-", " ", $type)));
         // interaction type[52] by method[6]
         unset($method);
         if ($a[6] != '-') {
             $data = $this->ParseStringArray($a[6]);
             $method = trim($data["label"]);
             $qname = trim($data["ns"]) . ":" . trim($data["id"]);
             if ($qname) {
                 parent::addRDF(parent::triplify($iid, parent::getVoc() . "method", $qname) . parent::describeClass($qname, $data['label']));
             }
         }
         parent::addRDF(parent::triplify($iid, "rdfs:seeAlso", "http://wodaklab.org/iRefWeb/interaction/show/" . $a[50]));
         // set the interactors
         for ($i = 0; $i <= 1; $i++) {
             $p = 'a';
             if ($i == 1) {
                 $p = 'b';
             }
             $data = $this->ParseStringArray($a[$i]);
             $interactor = trim($data["ns"]) . ":" . trim($data["id"]);
             parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}", $interactor));
             // biological role
             $role = $a[16 + $i];
             if ($role != '-') {
                 $data = $this->ParseStringArray($role);
                 $qname = trim($data["ns"]) . ":" . trim($data["id"]);
                 if ($qname != "mi:0000") {
                     parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}" . "_biological_role", $qname) . parent::describeClass($qname, $data['label']));
                 }
             }
             // experimental role
             $role = $a[18 + $i];
             if ($role != '-') {
                 $data = $this->ParseStringArray($role);
                 $qname = trim($data["ns"]) . ":" . trim($data["id"]);
                 if ($qname != "mi:0000") {
                     parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}" . "_experimental_role", $qname) . parent::describeClass($qname, $data['label']));
                 }
             }
             // interactor type
             $type = $a[20 + $i];
             if ($type != '-') {
                 $data = $this->ParseStringArray($type);
                 $qname = trim($data["ns"]) . ":" . trim($data["id"]);
                 parent::addRDF(parent::triplify($interactor, "rdf:type", $qname) . parent::describeClass($qname, $data['label']));
             }
         }
         // add the alternatives through the taxon + seq redundant group
         for ($i = 2; $i <= 3; $i++) {
             $taxid = '';
             $rogid = "irefindex." . $a[32 + ($i - 2)];
             parent::addRDF(parent::describeIndividual($rogid, "", parent::getVoc() . "Taxon-Sequence-Identical-Group") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Identical-Group", "Taxon + Sequence Identical Group"));
             $tax = $a[9 + ($i - 2)];
             if ($tax && $tax != '-' && $tax != '-1') {
                 $data = $this->ParseStringArray($tax);
                 $taxid = trim($data["ns"]) . ":" . trim($data["id"]);
                 parent::addRDF(parent::triplify($rogid, parent::getVoc() . "x-taxonomy", $taxid));
             }
             $list = explode("|", $a[3 + ($i - 2)]);
             foreach ($list as $item) {
                 $data = $this->ParseStringArray($item);
                 $ns = trim($data["ns"]);
                 $id = trim($data["id"]);
                 $qname = $ns . ":" . $id;
                 if ($ns && $ns != 'rogid' && $ns != 'irogid' and $id != '-') {
                     parent::addRDF(parent::triplify($rogid, parent::getVoc() . "has-member", $qname));
                     if ($taxid && $taxid != '-' && $taxid != '-1') {
                         parent::addRDF(parent::triplify($qname, parent::getVoc() . "x-taxonomy", $taxid));
                     }
                 }
             }
         }
         // publications
         $list = explode("|", $a[8]);
         foreach ($list as $item) {
             if ($item == '-' && $item != 'pubmed:0') {
                 continue;
             }
             $data = $this->ParseStringArray($item);
             $qname = trim($data["ns"]) . ":" . trim($data["id"]);
             parent::addRDF(parent::triplify($iid, parent::getVoc() . "article", $qname));
         }
         // MI interaction type
         if ($a[11] != '-' && $a[11] != 'NA') {
             $data = $this->ParseStringArray($a[11]);
             $qname = trim($data["ns"]) . ":" . trim($data["id"]);
             parent::addRDF(parent::triplify($iid, "rdf:type", $qname));
             if (!isset($defined[$qname])) {
                 $defined[$qname] = '';
                 parent::addRDF(parent::triplifyString($qname, "rdfs:label", $data['label']));
             }
         }
         // source
         if ($a[12] != '-') {
             $data = $this->ParseStringArray($a[12]);
             $qname = trim($data["ns"]) . ":" . trim($data["id"]);
             parent::addRDF(parent::triplify($iid, parent::getVoc() . "source", $qname));
         }
         // confidence
         $list = explode("|", $a[14]);
         foreach ($list as $item) {
             $data = $this->ParseStringArray($item);
             $ns = trim($data["ns"]);
             $id = trim($data["id"]);
             if ($ns == 'lpr') {
                 //  lowest number of distinct interactions that any one article reported
                 parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "minimum-number-interactions-reported", $id));
             } else {
                 if ($ns == "hpr") {
                     //  higher number of distinct interactions that any one article reports
                     parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "maximum-number-interactions-reported", $id));
                 } else {
                     if ($ns = 'hp') {
                         //  total number of unique PMIDs used to support the interaction
                         parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "number-supporting-articles", $id));
                     }
                 }
             }
         }
         // expansion method
         if ($a[15]) {
             $id = parent::getRes() . md5($a[15]);
             parent::addRDF(parent::describeIndividual($id, $a[15], parent::getVoc() . "Expansion-Method") . parent::describeClass(parent::getVoc() . "Expansion-Method", "Expansion Method") . parent::triplify($iid, parent::getVoc() . "expansion-method", $id));
         }
         // host organism
         if ($a[28] != '-') {
             $data = $this->ParseStringArray($a[28]);
             $qname = trim($data["ns"]) . ":" . trim($data["id"]);
             parent::addRDF(parent::triplify($iid, parent::getVoc() . "host-organism", $qname));
         }
         // @todo add to record
         // created 2010/05/18
         $date = str_replace("/", "-", $a[30]) . "T00:00:00Z";
         parent::addRDF(parent::triplifyString($iid, "dc:created", $date, "xsd:dateTime"));
         // taxon-sequence identical interaction group
         parent::addRDF(parent::triplify($iid, parent::getVoc() . "taxon-sequence-identical-interaction", $rigid) . parent::triplify($rigid, "rdf:type", parent::getVoc() . "Taxon-Sequence-Identical-Interaction") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Identical-Interaction", "Taxon + Sequence Identical Interaction") . parent::triplify($rigid, parent::getVoc() . "irigid", $irigid) . parent::triplify($rigid, parent::getVoc() . "interactor-a", $rogida) . parent::triplify($rogida, parent::getVoc() . "irogid", $irogida) . parent::triplify($rigid, parent::getVoc() . "interactor-b", $rogidb) . parent::triplify($rogidb, parent::getVoc() . "irogid", $irogidb) . parent::triplify($rogida, parent::getVoc() . "canonical-group", $crogida) . parent::triplify($rogidb, parent::getVoc() . "canonical-group", $crogidb) . parent::triplify($rigid, parent::getVoc() . "taxon-sequence-similar-interaction", $crigid) . parent::triplify($crigid, "rdf:type", parent::getVoc() . "Taxon-Sequence-Canonical-Interaction") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Canonical-Interaction", "Taxon + Sequence Canonical Interaction") . parent::triplify($crigid, parent::getVoc() . "icrigid", $icrigid) . parent::triplify($crigid, parent::getVoc() . "interactor-a-canonical-group", $crogida) . parent::triplify($crogida, "rdf:type", parent::getVoc() . "Taxon-Sequence-Similar-Group") . parent::triplify($crogida, parent::getVoc() . "icrogid", $icrogida) . parent::triplify($crigid, parent::getVoc() . "interactor-b-canonical-group", $crogidb) . parent::triplify($crogidb, "rdf:type", parent::getVoc() . "Taxon-Sequence-Similar-Group") . parent::triplify($crogidb, parent::getVoc() . "icrogid", $icrogidb) . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Similar-Group", "Taxon + Sequence Similar Group"));
         parent::writeRDFBufferToWriteFile();
     }
 }
Beispiel #12
0
 private function citations()
 {
     while ($l = $this->getReadFile()->read(2000000)) {
         $a = explode("\t|\t", rtrim($l, "\t|\n"));
         if (!isset($a[1]) or !isset($a[2])) {
             continue;
         }
         $c = parent::getRes() . "citation-id-" . $a[0];
         $seealso = isset($a[4]) ? trim($a[4]) : "";
         if ($seealso) {
             $seealso = str_replace(array("lx: DOI ", "http;//"), array("http://dx.doi.org/", "http://"), $seealso);
             if (strlen($seealso) > 2 and !strstr($seealso, "http")) {
                 $seealso = "http://" . $seealso;
             }
             $seealso = parent::triplify($c, "rdfs:seeAlso", $seealso);
         }
         parent::addRDF(parent::describeIndividual($c, $a[1], $this->getVoc() . "Citation") . parent::describeClass($this->getVoc() . "Citation", "Citation") . parent::triplifyString($c, parent::getVoc() . "citation-key", $a[1]) . ($a[2] == "0" ? "" : parent::triplify($c, parent::getVoc() . "x-pubmed", "pubmed:" . $a[2])) . $seealso . ((isset($a[5]) and $a[5]) ? parent::triplifyString($c, parent::getVoc() . "text", str_replace("\"", "", $a[5])) : ""));
         if (isset($a[6])) {
             $taxids = explode(" ", trim($a[6]));
             if (count($taxids)) {
                 foreach ($taxids as $taxid) {
                     parent::addRDF(parent::triplify("taxonomy:{$taxid}", $this->getVoc() . "citation", $c));
                 }
             }
         }
         $this->writeRDFBufferToWriteFile();
     }
     //while
 }
Beispiel #13
0
 function AddText(&$x, $id, $list_name, $item_name, $predicate, $list_item_name = null)
 {
     if (isset($x->{$list_name})) {
         foreach ($x->{$list_name} as $item) {
             if (isset($item->{$item_name}) && $item->{$item_name} != '') {
                 $l = $item->{$item_name};
                 if (isset($l->{$list_item_name})) {
                     foreach ($l->{$list_item_name} as $k) {
                         $kid = parent::getRes() . md5($k);
                         $this->addRDF($this->describeIndividual($kid, "{$item_name} for {$id}", parent::getVoc() . ucfirst($item_name)) . $this->describeClass(parent::getVoc() . ucfirst($item_name), $item_name) . $this->triplifyString($kid, "rdf:value", $k) . $this->triplify($id, $predicate, $kid));
                     }
                 } else {
                     $kid = parent::getRes() . md5($l);
                     $this->addRDF($this->describeIndividual($kid, "{$item_name} for {$id}", parent::getVoc() . ucfirst($item_name)) . $this->describeClass(parent::getVoc() . ucfirst($item_name), $item_name) . $this->triplifyString($kid, "rdf:value", $l) . $this->triplify($id, $predicate, $kid));
                 }
             }
         }
     }
 }
Beispiel #14
0
 function genes($file)
 {
     $xml = new CXML($file);
     while ($xml->parse("DisorderList") == TRUE) {
         $x = $xml->GetXMLRoot();
         foreach ($x->Disorder as $d) {
             $orphanet_id = parent::getNamespace() . (string) $d->OrphaNumber;
             $disorder_name = (string) $d->Name;
             foreach ($d->DisorderGeneAssociationList->DisorderGeneAssociation as $dga) {
                 // gene
                 $gene = $dga->Gene;
                 $gene_id = parent::getNamespace() . (string) $gene->OrphaNumber;
                 $gene_internal_id = (string) $gene->attributes()->id;
                 $gene_label = (string) $gene->Name;
                 $gene_symbol = (string) $gene->Symbol;
                 parent::addRDF(parent::describeIndividual($gene_id, $gene_label, parent::getVoc() . "Gene") . parent::describeClass(parent::getVoc() . "Gene", "orphanet gene") . parent::triplifyString($gene_id, parent::getVoc() . "symbol", $gene_symbol));
                 foreach ($gene->SynonymList as $s) {
                     $synonym = (string) $s->Synonym;
                     parent::addRDF(parent::triplifyString($gene_id, parent::getVoc() . "synonym", $synonym));
                 }
                 foreach ($gene->ExternalReferenceList as $erl) {
                     $er = $erl->ExternalReference;
                     $db = (string) $er->Source;
                     $db = parent::getRegistry()->getPreferredPrefix($db);
                     $id = (string) $er->Reference;
                     $xref = "{$db}:{$id}";
                     parent::addRDF(parent::triplify($gene_id, parent::getVoc() . "x-{$db}", $xref));
                 }
                 $dga_id = parent::getRes() . (string) $d->OrphaNumber . "_" . md5($dga->asXML());
                 $ga = $dga->DisorderGeneAssociationType;
                 $ga_id = parent::getNamespace() . (string) $ga->attributes()->id;
                 $ga_label = (string) $ga->Name;
                 $s = $dga->DisorderGeneAssociationStatus;
                 $s_id = parent::getNamespace() . (string) $s->attributes()->id;
                 $s_label = (string) $s->Name;
                 parent::addRDF(parent::describeIndividual($dga_id, "{$ga_label} {$gene_label} in {$disorder_name} ({$s_label})", $ga_id) . parent::describeClass($ga_id, $ga_label, parent::getVoc() . "Disorder-Gene-Association") . parent::triplify($dga_id, parent::getVoc() . "status", $s_id) . parent::describeClass($s_id, $s_label, parent::getVoc() . "Disorder-Gene-Association-Status") . parent::triplify($dga_id, parent::getVoc() . "disorder", $orphanet_id) . parent::describeIndividual($orphanet_id, $disorder_name, parent::getVoc() . "Disorder") . parent::triplify($dga_id, parent::getVoc() . "gene", $gene_id));
             }
             parent::writeRDFBufferToWriteFile();
         }
     }
     unset($xml);
 }
Beispiel #15
0
 function product($fpin)
 {
     $z = 0;
     $list = '';
     fgets($fpin);
     // header
     while ($l = fgets($fpin, 100000)) {
         $a = explode("\t", $l);
         if (count($a) != 18) {
             trigger_error("Expected 18 coloumns, instead found" . count($a));
             continue;
         }
         $product_id = parent::getNamespace() . $a[0];
         $product_label = $a[3];
         $product_type_label = ucfirst(strtolower($a[2]));
         $product_type = parent::getVoc() . str_replace(" ", "-", $product_label);
         parent::addRDF(parent::describeIndividual($product_id, $a[3], parent::getVoc() . "Product") . parent::describeClass(parent::getVoc() . "Product", "NDC Product") . parent::triplify($product_id, parent::getVoc() . "product-type", $product_type) . parent::describeIndividual($product_type, $product_type_label, parent::getVoc() . "Product-Type") . parent::describeClass(parent::getVoc() . "Product-Type", "Product Type") . parent::triplifyString($product_id, parent::getVoc() . "product-id", $a[1]) . parent::triplifyString($product_id, parent::getVoc() . "proprietary-name", $a[3]) . parent::triplifyString($product_id, parent::getVoc() . "trade-name-suffix", $a[4]));
         if ($a[5]) {
             $b = explode(";", $a[5]);
             foreach ($b as $c) {
                 parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "non-proprietary-name", trim($c)));
             }
         }
         if ($a[6]) {
             $b = explode(",", $a[6]);
             foreach ($b as $c) {
                 $dosageform = strtolower($c);
                 $dosageform_id = parent::getVoc() . str_replace(" ", "-", ucfirst(strtolower($c)));
                 parent::addRDF(parent::describeIndividual($dosageform_id, $dosageform, parent::getVoc() . "Dosage-Form") . parent::describeClass(parent::getVoc() . "Dosage-Form", "NDC Dosage Form") . parent::triplify($product_id, parent::getVoc() . "dosage-form", $dosageform_id));
             }
         }
         if ($a[7]) {
             //  MV
             $b = explode("; ", $a[7]);
             foreach ($b as $c) {
                 $route = strtolower(trim($c));
                 $route_id = parent::getVoc() . str_replace(" ", "-", ucfirst(strtolower($c)));
                 parent::addRDF(parent::describeIndividual($route_id, $route, parent::getVoc() . "Route") . parent::describeClass(parent::getVoc() . "Route", "NDC Drug Route") . parent::triplify($product_id, parent::getVoc() . "route", $route_id));
             }
         }
         if ($a[8]) {
             $date = substr(0, 4, $a[8]) . "-" . substr(4, 2, $a[8]) . "-" . substr(6, 2, $a[8]);
             parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "start-marketing-date", $date));
         }
         if ($a[9]) {
             $date = substr(0, 4, $a[9]) . "-" . substr(4, 2, $a[9]) . "-" . substr(6, 2, $a[9]);
             parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "end-marketing-date", $date));
         }
         if ($a[10]) {
             parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "marketing-category", $a[10]));
         }
         if ($a[11]) {
             parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "application-number", $a[11]));
         }
         // create a labeller node
         if ($a[12]) {
             $labeller_id = parent::getRes() . md5($a[12]);
             $label = addslashes($a[12]);
             parent::addRDF(parent::describeIndividual($labeller_id, $label, parent::getVoc() . "Labeller") . parent::describeClass(parent::getVoc() . "Labeller", "NDC Labeller") . parent::triplify($product_id, parent::getVoc() . "labeller", $labeller_id));
         }
         // the next three are together
         if ($a[13]) {
             // MV
             $substances = explode(";", $a[13]);
             $strengths = explode(";", $a[14]);
             $units = explode(";", $a[15]);
             $l = '';
             foreach ($substances as $i => $substance) {
                 // list the active ingredient
                 $ingredient_label = strtolower($substance);
                 $strength = '';
                 if (isset($strengths[$i])) {
                     $strength = $strengths[$i];
                 }
                 $unit = $units[$i];
                 $ingredient_id = parent::getRes() . md5($ingredient_label);
                 parent::addRDF(parent::describeIndividual($ingredient_id, $ingredient_label, parent::getVoc() . "Ingredient") . parent::describeClass(parent::getVoc() . "Ingredient", "NDC Ingredient") . parent::triplify($product_id, parent::getVoc() . "ingredient", $ingredient_id));
                 // describe the substance composition
                 $substance_label = "{$strength} {$unit} {$ingredient_label}";
                 $substance_id = parent::getRes() . md5($substance_label);
                 parent::addRDF(parent::describeIndividual($substance_id, $substance_label, parent::getVoc() . "Substance") . parent::triplifyString($substance_id, parent::getVoc() . "amount", $strength) . parent::describeClass(parent::getVoc() . "Substance", "NDC Substance"));
                 $unit_id = parent::getVoc() . md5($unit);
                 parent::addRDF(parent::describeIndividual($unit_id, $unit, parent::getVoc() . "Unit") . parent::describeClass(parent::getVoc() . "Unit", "NDC Unit") . parent::triplify($substance_id, parent::getVoc() . "amount_unit", $unit_id) . parent::triplify($product_id, parent::getVoc() . "has-part", $substance_id));
             }
         }
         if ($a[16]) {
             // MV
             $b = explode(",", $a[16]);
             foreach ($b as $c) {
                 $cat_id = parent::getVoc() . md5($c);
                 parent::addRDF(parent::describeIndividual($cat_id, $c, parent::getVoc() . "Pharmacological-Class") . parent::describeClass(parent::getVoc() . "Pharmacological-Class", "NDC Pharmacological Class") . parent::triplify($product_id, parent::getVoc() . "pharmacological-class", $cat_id));
             }
         }
         parent::WriteRDFBufferToWriteFile();
     }
 }
Beispiel #16
0
 function process()
 {
     $gb_record_str = "";
     while ($aLine = $this->getReadFile()->Read(4096)) {
         preg_match("/^\\/\\/\$/", $aLine, $matches);
         if (count($matches)) {
             //now remove the header if it is there
             $gb_record_str = $this->removeHeader($gb_record_str);
             $sectionsRaw = $this->parseGenbankRaw($gb_record_str);
             /**
              * SECTIONS being parsed:
              * locus, definition, accession, version, keywords, segment, source, reference, features
              */
             //get locus section(s)
             $locus = $this->retrieveSections("LOCUS", $sectionsRaw);
             $parsed_locus_arr = $this->parseLocus($locus);
             //get the definition section
             $definition = $this->retrieveSections("DEFINITION", $sectionsRaw);
             $parsed_definition_arr = $this->parseDefinition($definition);
             //get the accession
             $accessions = $this->retrieveSections("ACCESSION", $sectionsRaw);
             $parsed_accession_arr = $this->parseAccession($accessions);
             //get the version
             $versions = $this->retrieveSections("VERSION", $sectionsRaw);
             $parsed_version_arr = $this->parseVersion($versions);
             //get the keywords
             $keywords = $this->retrieveSections("KEYWORDS", $sectionsRaw);
             $parsed_keyword_arr = $this->parseKeywords($keywords);
             //may not be any segment section
             $segments = $this->retrieveSections("SEGMENT", $sectionsRaw);
             if (!empty($segments)) {
                 $parsed_segments_arr = $this->parseSegment($segments);
             }
             $features = $this->retrieveSections("FEATURES", $sectionsRaw);
             $parsed_features_arr = $this->parseFeatures($features);
             //get the source section
             $source = $this->retrieveSections("SOURCE", $sectionsRaw);
             $parsed_source_arr = $this->parseSource($source);
             $contig = $this->retrieveSections("CONTIG", $sectionsRaw);
             if (!empty($contig)) {
                 $parsed_contig_arr = $this->parseContig($contig);
             }
             //get the reference section
             $references = $this->retrieveSections("REFERENCE", $sectionsRaw);
             $parsed_refs_arr = $this->parseReferences($references);
             $gb_res = "gi:" . $parsed_version_arr['gi'];
             $gb_label = utf8_encode(htmlspecialchars($parsed_definition_arr[0]));
             parent::AddRDF(parent::describeIndividual($gb_res, $gb_label, $this->getVoc() . "genbank-record") . parent::triplifyString($gb_res, $this->getVoc() . 'sequence-length', $parsed_locus_arr[0]['sequence_length']) . parent::triplifyString($gb_res, $this->getVoc() . 'strandedness', $parsed_locus_arr[0]['strandedness']) . parent::triplify($gb_res, "rdf:type", $this->getRes() . $parsed_locus_arr[0]['mol_type']) . parent::triplifyString($gb_res, $this->getVoc() . 'chromosome-shape', $parsed_locus_arr[0]['chromosome_shape']) . parent::triplifyString($gb_res, $this->getVoc() . 'division-name', $parsed_locus_arr[0]['division_name']) . parent::triplifyString($gb_res, $this->getVoc() . 'date-of-entry', $parsed_locus_arr[0]['date']) . parent::triplifyString($gb_res, $this->getVoc() . 'source', utf8_encode($parsed_source_arr[0])) . parent::QQuadO_URL($gb_res, $this->getVoc() . 'fasta-seq', 'https://www.ncbi.nlm.nih.gov/sviewer/viewer.cgi?sendto=on&db=nucest&dopt=fasta&val=' . $parsed_version_arr['gi']));
             foreach ($parsed_features_arr as $aFeature) {
                 //getFeatures
                 $type = $aFeature['type'];
                 $feat_desc = $this->getFeatures($type);
                 $label = preg_replace('/\\s\\s*/', ' ', $feat_desc['definition']);
                 $comment = null;
                 $value = $aFeature['value'];
                 $value_arr = explode("/", $value);
                 $location = preg_replace('/\\n/', '', $value_arr[0]);
                 $class_id = parent::getVoc() . md5($type);
                 $feat_res = parent::getRes() . md5($type . $location . $gb_res);
                 $feat_label = utf8_encode($type . " " . $location . " for " . $gb_res);
                 if (isset($feat_desc['comment'])) {
                     $comment = $feat_desc['comment'];
                     $comment = preg_replace('/\\s\\s*/', ' ', $comment);
                     $label .= " " . $comment;
                 }
                 parent::AddRDF(parent::describeClass($class_id, $label, parent::getVoc() . "Feature") . parent::describeIndividual($feat_res, $feat_label, $class_id) . parent::triplify($gb_res, $this->getVoc() . "has-feature", $feat_res));
                 foreach ($value_arr as $aL) {
                     //check if aL has an equals in it
                     $p = "/(\\S+)\\=(.*)/";
                     preg_match($p, $aL, $m);
                     if (count($m)) {
                         if ($m[1] == "db_xref") {
                             parent::AddRDF(parent::triplify($feat_res, "rdfs:seeAlso", str_replace("\"", "", $m[2])));
                         } else {
                             parent::AddRDF(parent::triplifyString($feat_res, $this->getVoc() . $m[1], utf8_encode(str_replace("\"", "", $m[2]))));
                         }
                     }
                 }
             }
             foreach ($parsed_accession_arr[0] as $acc) {
                 parent::AddRDF(parent::triplifyString($gb_res, $this->getVoc() . "accession", $acc));
             }
             if (isset($parsed_version_arr['versioned_accession'])) {
                 parent::AddRDF(parent::triplifyString($gb_res, $this->getVoc() . "versioned-accession", $parsed_version_arr['versioned_accession']));
             }
             if (isset($parsed_contig_arr)) {
                 foreach ($parsed_contig_arr as $aContig) {
                     parent::AddRDF(parent::triplifyString($gb_res, $this->getVoc() . "contig", parent::safeLiteral($aContig)));
                 }
             }
             foreach ($parsed_keyword_arr as $akw) {
                 parent::AddRDF(parent::triplifyString($gb_res, $this->getVoc() . "keyword", $akw));
             }
             if (isset($parsed_segments_arr)) {
                 foreach ($parsed_segments_arr as $aSeg) {
                     parent::AddRDF(parent::triplifyString($gb_res, $this->getVoc() . "segment-number", $aSeg['segment_number']) . parent::triplifyString($gb_res, $this->getVoc() . "total-segments", $aSeg['total_segments']));
                 }
             }
             foreach ($parsed_refs_arr as $aRef) {
                 $r = rand();
                 $ref_res = $this->getRes() . md5($r);
                 $ref_label = "reference for " . $gb_res;
                 if (isset($aRef['TITLE'])) {
                     parent::AddRDF(parent::describeIndividual($ref_res, $ref_label, $this->getVoc() . "reference") . parent::triplifyString($ref_res, $this->getVoc() . "title", $aRef['TITLE']));
                 }
                 if (isset($aRef['PUBMED'])) {
                     parent::AddRDF(parent::triplify($ref_res, $this->getVoc() . "x-pubmed", 'pubmed:' . $aRef['PUBMED']));
                 }
                 if (isset($aRef['AUTHORS'])) {
                     parent::AddRDF(parent::triplifyString($ref_res, $this->getVoc() . "authors", $aRef['AUTHORS']));
                 }
                 parent::AddRDF(parent::triplify($gb_res, $this->getVoc() . "reference", $ref_res) . parent::triplifyString($ref_res, $this->getVoc() . "coordinates", $aRef['COORDINATES']) . parent::triplifyString($ref_res, $this->getVoc() . "citation", $aRef['JOURNAL']));
             }
             $gb_record_str = "";
             $this->WriteRDFBufferToWriteFile();
             continue;
         }
         preg_match("/^\n\$/", $aLine, $matches);
         if (count($matches) == 0) {
             $gb_record_str .= $aLine;
         }
     }
     //while
 }
Beispiel #17
0
 private function gene2accession()
 {
     $this->getReadFile()->read(200000);
     $header = array(0 => array('rel' => "x-taxonomy", 'ns' => "taxonomy"), 1 => array('rel' => "ncbigene", 'ns' => "ncbigene"), 2 => array('rel' => "status"), 3 => array('rel' => "rna-nucleotide-accession.version", 'ns' => "genbank"), 4 => array('rel' => "rna-nucleotide-gi", 'ns' => "gi"), 5 => array('rel' => "protein-accession.version", 'ns' => "genbank"), 6 => array('rel' => "protein-gi", 'ns' => "gi"), 7 => array('rel' => "genomic-nucleotide-accession.version", 'ns' => "genbank"), 8 => array('rel' => "genomic-nucleotide-gi", 'ns' => "gi"), 9 => array('rel' => "genomic-start-position"), 10 => array('rel' => "genomic-end-position"), 11 => array('rel' => "orientation"), 12 => array('rel' => "assembly"), 13 => array('rel' => "mature-peptide-accession.version", 'ns' => "genbank"), 14 => array('rel' => "mature-peptide-gi", 'ns' => "gi"), 15 => array('rel' => "symbol"));
     //(tab is used as a separator, pound sign - start of a comment) */
     $z = 1;
     while ($l = $this->getReadFile()->read(200000)) {
         if ($l[0] == "#") {
             continue;
         }
         if ($z++ % 10000 == 0) {
             echo $z . PHP_EOL;
             parent::clear();
         }
         $a = explode("\t", rtrim($l));
         if (count($a) != 16) {
             trigger_error("gene2accession: expecting 16 columns, found " . count($a) . " instead", E_USER_ERROR);
         }
         $taxid = $a[0];
         if (isset($this->taxids) and !isset($this->taxids[$taxid])) {
             continue;
         }
         $id = parent::getNamespace() . $a[1];
         $refseq = false;
         if ($a[2] != '-') {
             $refseq = true;
         }
         if ($a[9] != '-' and $a[10] != '-') {
             $region = parent::getRes() . $a[7] . "/" . $a[9] . "-" . $a[10];
             $start_pos = parent::getRes() . $a[7] . "/" . $a[9];
             $stop_pos = parent::getRes() . $a[7] . "/" . $a[10];
             if ($a[11] == "+") {
                 $orientation = "faldo:ForwardStrandPosition";
             } else {
                 if ($a[11] == "-") {
                     $orientation = "faldo:ReverseStrandPosition";
                 } else {
                     $orientation = "faldo:StrandedPosition";
                 }
             }
             parent::addRDF(parent::describeIndividual($region, "location of ncbigene:" . $a[1] . " on " . $a[7], "faldo:Region") . parent::describeIndividual($start_pos, "start of ncbigene:" . $a[1] . " on " . $a[7], "faldo:ExactPosition") . parent::describeIndividual($stop_pos, "stop position of ncbigene:" . $a[1] . " on " . $a[7], "faldo:ExactPosition") . parent::triplify($id, "faldo:location", $region) . parent::triplify($region, "faldo:begin", $start_pos) . parent::triplify($start_pos, "rdf:type", $orientation) . parent::triplifyString($start_pos, "faldo:position", $a[9], "xsd:integer") . parent::triplify($start_pos, "faldo:reference", "refseq:" . $a[7]) . parent::triplify($region, "faldo:end", $stop_pos) . parent::triplify($stop_pos, "rdf:type", $orientation) . parent::triplifyString($stop_pos, "faldo:position", $a[10], "xsd:integer") . parent::triplify($stop_pos, "faldo:reference", "refseq:" . $a[7]));
         }
         foreach ($header as $i => $v) {
             if ($a[$i] == "-") {
                 continue;
             }
             if ($i == 1 or $i == 9 or $i == 10 or $i == 11) {
                 continue;
             }
             /// ncbigene
             if (isset($v['ns'])) {
                 $ns = $v['ns'];
                 if ($ns == 'genbank' and $refseq == true) {
                     $ns = 'refseq';
                 }
                 parent::addRDF(parent::triplify($id, parent::getVoc() . $v['rel'], "{$ns}:" . $a[$i]));
             } else {
                 parent::addRDF(parent::triplifyString($id, parent::getVoc() . $v['rel'], $a[$i]));
             }
         }
         parent::writeRDFBufferToWriteFile();
     }
     //while
 }
Beispiel #18
0
 function gene_expression()
 {
     $h = explode(",", parent::getReadFile()->read());
     $expected_columns = 8;
     if (($n = count($h)) != $expected_columns) {
         trigger_error("Found {$n} columns in gene file - expecting {$expected_columns}!", E_USER_WARNING);
         return false;
     }
     while ($l = parent::getReadFile()->read(200000)) {
         $data = str_getcsv($l);
         $mgi_symbol = $data[0];
         $mgi_description = $data[1];
         $geneid = $data[2];
         $total_datasets = $data[3];
         $total_ovexp = $data[4];
         $total_underexp = $data[5];
         $p_value = $data[6];
         $expression = $data[7];
         $id = parent::getRes() . md5($geneid . $total_datasets . $total_ovexp . $total_underexp . $p_value . $expression);
         $evidence_id = parent::getRes() . md5($geneid . $total_datasets . $total_ovexp . $total_underexp . $p_value . $expression . "_evidence");
         $label = "Dietary restriction induced " . $expression . "-expression of " . $mgi_symbol . " based on microarray results from " . $total_datasets . " datasets, with p-value " . $p_value;
         $type_label = "Gene " . ucfirst($expression) . " Expression";
         $type = parent::getVoc() . str_replace(" ", "-", $type_label);
         parent::addRDF(parent::describeIndividual($id, $label, $type) . parent::describeClass($type, $type_label) . parent::triplify($id, parent::getVoc() . "gene", "ncbigene:" . $geneid) . parent::triplifyString("ncbigene:" . $geneid, parent::getVoc() . "mgi-gene-symbol", $mgi_symbol) . parent::triplifyString("ncbigene:" . $geneid, parent::getVoc() . "mgi-gene-description", $mgi_description) . parent::triplify($id, parent::getVoc() . "evidence", $evidence_id) . parent::triplifyString($id, parent::getVoc() . "perturbation-context", "dietary restriction") . parent::triplifyString($evidence_id, parent::getVoc() . "total-number-datasets", $total_datasets) . parent::triplifyString($evidence_id, parent::getVoc() . "total-number-datasets-overexpressed", $total_ovexp) . parent::triplifyString($evidence_id, parent::getVoc() . "total-number-datasets-underexpressed", $total_underexp) . parent::triplifyString($evidence_id, parent::getVoc() . "p-value", $p_value));
         parent::writeRDFBufferToWriteFile();
     }
     //while
 }
Beispiel #19
0
 function gene_interactions()
 {
     while ($l = parent::getReadFile()->Read()) {
         if ($l[0] == '#') {
             continue;
         }
         $data = explode("\t", $l);
         if (count($data) != 11) {
             trigger_error("Found " . count($data) . " columns, expecting 11");
             continue;
         }
         $interaction = $data[0];
         $interaction_type = str_replace("_", "-", $data[1]);
         $interaction_type_label = str_replace("_", " ", $data[1]);
         $int_additional_info = $data[2];
         $gene1 = $data[5];
         $gene2 = $data[8];
         $interaction_id = parent::getNamespace() . $interaction;
         if ($interaction_type == "Genetic") {
             $int_pred = parent::getVoc() . "genetically-interacts-with";
         } elseif ($interaction_type == "Physical") {
             $int_pred = parent::getVoc() . "physically-interacts-with";
         } elseif ($interaction_type == "Predicted") {
             $int_pred = parent::getVoc() . "predicted-to-interact-with";
         } elseif ($interaction_type == "Regulatory") {
             $int_pred = parent::getVoc() . "regulates";
         }
         //elseif
         if ($int_additional_info == "No_interaction") {
             $interaction_label = "No " . strtolower($interaction_type) . " interaction between " . $gene1 . " and " . $gene2;
             parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, parent::getVoc() . $interaction_type . "-Non-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Non-Interaction", $interaction_type_label . " non-interaction") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2));
             $npa_id = parent::getRes() . md5($interaction_id . "negative property assertion");
             $npa_label = "Negative property assertion stating that " . $gene1 . " and " . $gene2 . " do not have a " . $interaction_type_label . " interaction";
             parent::addRDF(parent::describeIndividual($npa_id, $npa_label, "owl:NegativeObjectPropertyAssertion") . parent::triplify($npa_id, "owl:sourceIndividual", parent::getNamespace() . $gene1) . parent::triplify($npa_id, "owl:targetIndividual", parent::getNamespace() . $gene2) . parent::triplify($npa_id, "owl:assertionProperty", $int_pred));
         } elseif ($int_additional_info == "N/A" || $int_additional_info == "Genetic_interaction") {
             $interaction_label = $interaction_type . " interaction between " . $gene1 . " and " . $gene2;
             parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, parent::getVoc() . $interaction_type . "-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Interaction", $interaction_type_label . " Interaction") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2) . parent::triplify(parent::getNamespace() . $gene1, $int_pred, parent::getNamespace() . $gene2));
         } else {
             $interaction_label = ($int_additional_info != "" ? $int_additional_info . " " : "") . strtolower($interaction_type) . " interaction between " . $gene1 . " and " . $gene2;
             $type = parent::getVoc() . ($int_additional_info != "" ? $int_additional_info . "-" : "") . $interaction_type . "-Interaction";
             $type_label = ($int_additional_info != "" ? $int_additional_info . " " : "") . $interaction_type_label . " Interaction";
             parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, $type) . parent::describeClass($type, $type_label, parent::getVoc() . $interaction_type . "-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Interaction", $interaction_type . " Interation") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2) . parent::triplify(parent::getNamespace() . $gene1, $int_pred, parent::getNamespace() . $gene2));
         }
         //else
         parent::WriteRDFBufferToWriteFile();
     }
     //while
 }
Beispiel #20
0
 function process($file)
 {
     $z = 1;
     while ($l = parent::getReadFile()->read(100000)) {
         if ($z % 100000 == 0) {
             parent::clear();
         }
         if ($l[0] == "!") {
             continue;
         }
         $fields = explode("\t", $l);
         if (count($fields) != 17) {
             trigger_error("Expected 17 columns, but found " . count($fields), E_USER_ERROR);
             return false;
         }
         //get the Go id
         $db = $fields[0];
         $id = $fields[1];
         $symbol = $fields[2];
         $qualifier = $fields[3];
         $goid = substr($fields[4], 3);
         $refs = $this->getDbReferences($fields[5]);
         $eco = $this->getEvidenceCodeLabelArr($fields[6]);
         $aspect = $this->getAspect($fields[8]);
         $label = $fields[9];
         $synonyms = explode("|", $fields[10]);
         $taxid = $fields[12];
         $date = $this->parseDate($fields[13]);
         $assignedBy = $fields[14];
         //entity id
         $eid = $this->getdbURI($db, $id);
         if (!$eid) {
             print_r($fields);
             continue;
         }
         parent::addRDF(parent::describeIndividual($eid, $label, parent::getVoc() . "GO-Annotation") . parent::describeClass(parent::getVoc() . "GO-Annotation", "GO Annotation") . parent::triplifyString($eid, parent::getVoc() . "symbol", $symbol));
         parent::addRDF(parent::triplify($eid, parent::getVoc() . "x-taxonomy", $taxid));
         foreach ($synonyms as $s) {
             if (!empty($s)) {
                 parent::addRDF(parent::triplifyString($eid, parent::getVoc() . "synonym", $s));
             }
         }
         $rel = $aspect;
         if ($qualifier == 'NOT') {
             if ($aspect == 'process') {
                 $rel = 'not-in-process';
             }
             if ($aspect == 'function') {
                 $rel = 'not-has-function';
             }
             if ($aspect == 'component') {
                 $rel = 'not-in-component';
             }
         }
         parent::addRDF(parent::describeObjectProperty(parent::getVoc() . $rel, str_replace("-", " ", $rel)) . parent::triplify($eid, parent::getVoc() . $rel, "go:" . $goid));
         $type = key($eco);
         $aid = parent::getRes() . $file . "_" . $z++;
         parent::addRDF(parent::describeObjectProperty(parent::getVoc() . "go-annotation", "GO annotation") . parent::triplify($eid, parent::getVoc() . "go-annotation", $aid));
         $cat = parent::getRes() . md5($aspect);
         parent::addRDF(parent::describeIndividual($aid, "{$id}-go:{$goid} association", parent::getVoc() . "GO-Annotation") . parent::triplify($aid, parent::getVoc() . "target", $eid) . parent::triplify($aid, parent::getVoc() . "go-term", "go:" . $goid) . parent::triplify($aid, parent::getVoc() . "evidence", "eco:" . $eco[$type][1]) . parent::triplify($aid, parent::getVoc() . "go-category", $cat) . parent::describeClass($cat, $aspect) . parent::triplifyString($aid, parent::getVoc() . "assigned-by", $assignedBy));
         if ($date != '') {
             parent::addRDF(parent::triplifyString($aid, parent::getVoc() . "entry-date", $date . "T00:00:00Z", "xsd:dateTime"));
         }
         foreach ($refs as $ref) {
             $b = explode(":", $ref);
             if ($b[0] == 'PMID') {
                 parent::addRDF(parent::triplify($aid, parent::getVoc() . "article", "pubmed:" . $b[1]));
             }
         }
         //write RDF to file
         parent::writeRDFBufferToWriteFile();
     }
 }
 public function makeDescription($title, $type)
 {
     if (!$title) {
         return null;
     }
     $uri = parent::getRes() . md5($title);
     $type_uri = parent::getVoc() . str_replace(" ", "-", $type);
     parent::addRDF(parent::describeIndividual($uri, $title, $type_uri) . parent::describeClass($type_uri, $type));
     return $uri;
 }
Beispiel #22
0
 function parseItem($item)
 {
     $id = $item['@attributes']['id'];
     $label = $item['name'];
     parent::addRDF(parent::describeIndividual($id, $item['name'], parent::getVoc() . "Entry") . parent::describeClass(parent::getVoc() . "Entry", "MIRIAM database entry") . parent::triplifyString($id, parent::getVoc() . "namespace", $item['namespace']));
     if (isset($item['@attributes'])) {
         foreach ($item['@attributes'] as $k => $v) {
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . $k, $v));
         }
     }
     if (isset($item['comment'])) {
         parent::addRDF(parent::triplifyString($id, parent::getVoc() . "comment", $item['comment']));
     }
     if (isset($item['definition'])) {
         parent::addRDF(parent::triplifyString($id, parent::getVoc() . "definition", $item['definition']));
     }
     if (isset($item['synonyms'])) {
         $mylist = null;
         if (is_array($item['synonyms']['synonym'])) {
             $mylist = $item['synonyms']['synonym'];
         } else {
             $mylist[] = $item['synonyms']['synonym'];
         }
         foreach ($mylist as $myitem) {
             parent::addRDF(parent::triplifyString($id, "skos:altLabel", $myitem));
         }
     }
     if (isset($item['uris'])) {
         foreach ($item['uris']['uri'] as $uri) {
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "uri", $uri));
         }
     }
     if (isset($item['resources'])) {
         $mylist = null;
         if (!isset($item['resources']['resource']['dataEntry'])) {
             $mylist = $item['resources']['resource'];
         } else {
             $mylist[] = $item['resources']['resource'];
         }
         foreach ($mylist as $myitem) {
             $rid = $myitem['@attributes']['id'];
             parent::addRDF(parent::describeIndividual($rid, $myitem['dataInfo'], parent::getVoc() . "Resource") . parent::describeClass(parent::getVoc() . "Resource", "MIRIAM Resource") . parent::triplify($rid, parent::getVoc() . "url", $myitem['dataResource']) . parent::triplifyString($rid, parent::getVoc() . "urlTemplate", $myitem['dataEntry']) . parent::triplifyString($rid, parent::getVoc() . "organization", is_array($myitem['dataInstitution']) ? "" : $myitem['dataInstitution']) . parent::triplifyString($rid, parent::getVoc() . "location", is_array($myitem['dataLocation']) ? "" : $myitem['dataLocation']) . parent::triplify($id, parent::getVoc() . "resource", $rid));
         }
     }
     if (isset($item['tags'])) {
         $i = $item['tags']['tag'];
         $mylist = null;
         if (!is_array($i)) {
             $mylist[] = $i;
         } else {
             $mylist = $i;
         }
         foreach ($mylist as $myitem) {
             parent::addRDF(parent::triplifyString($id, parent::getvoc() . "tag", $myitem));
         }
     }
     if (isset($item['documentations'])) {
         $i = $item['documentations']['documentation'];
         $mylist = null;
         if (!is_array($i)) {
             $mylist[] = $i;
         } else {
             $mylist = $i;
         }
         foreach ($mylist as $myitem) {
             if (strstr($myitem, "pubmed")) {
                 $uri = "pubmed:" . substr($myitem, strrpos($myitem, ":") + 1);
             } else {
                 if (strstr($myitem, "doi")) {
                     $uri = "http://dx.doi.org/" . substr($myitem, strpos($myitem, "doi:"));
                 } else {
                     $uri = $myitem;
                 }
             }
             parent::addRDF(parent::triplify($id, parent::getvoc() . "documentation", $uri));
         }
     }
     if (isset($item['restrictions'])) {
         $mylist = null;
         if (!isset($item['restrictions']['restriction']['statement'])) {
             $mylist = $item['restrictions']['restriction'];
         } else {
             $mylist[] = $item['restrictions']['restriction'];
         }
         foreach ($mylist as $i => $myitem) {
             $rid = parent::getRes() . str_replace(":", "", $id) . "_" . ($i + 1);
             $a = $myitem['@attributes'];
             $rid_type = parent::getVoc() . 'restriction_type_' . $a['type'];
             parent::addRDF(parent::describeIndividual($rid, $a['desc'], parent::getVoc() . "Restriction") . parent::describeClass(parent::getVoc() . "Restriction", "Resource Restriction") . parent::triplify($rid, "rdf:type", $rid_type) . parent::describeClass($rid_type, $a['desc'], parent::getVoc() . "Restriction") . parent::triplifyString($rid, "dct:description", $myitem['statement']) . parent::triplify($rid, "foaf:page", isset($myitem['link']) ? $myitem['link'] : "") . parent::triplify($id, parent::getVoc() . "restriction", $rid));
         }
     }
     /*
     <annotation>
     	<format name="SBML">
     		<elements>
     			<element>reaction</element>
     			<element>event</element>
     			<element>rule</element>
     			<element>species</element>
     		</elements>
     	</format>
     */
     if (isset($item['annotation'])) {
         $mylist = null;
         if (!isset($item['annotation']['format']['elements'])) {
             $mylist = $item['annotation']['format'];
         } else {
             $mylist[] = $item['annotation']['format'];
         }
         foreach ($mylist as $i => $myitem) {
             $name = $myitem['@attributes']['name'];
             $myid = str_replace("MIR:", parent::getRes(), $id) . "_annotation_" . ($i + 1) . "_" . urlencode($name);
             parent::addRDF(parent::describeIndividual($myid, "{$label} used by {$name}", parent::getVoc() . "ValueSet") . parent::describeClass(parent::getVoc() . "ValueSet", "MIRIAM Value Set") . parent::triplifyString($myid, parent::getVoc() . "used-in", $name) . parent::triplify($myid, parent::getVoc() . "uses", $id));
             $b = $myitem['elements']['element'];
             $mylist2 = null;
             if (!is_array($b)) {
                 $mylist2[] = $b;
             } else {
                 $mylist2 = $b;
             }
             foreach ($mylist2 as $i => $e) {
                 parent::addRDF(parent::triplifyString($myid, parent::getVoc() . "used-for", $e));
             }
         }
     }
 }