Beispiel #1
0
 function Run()
 {
     $ldir = $this->GetParameterValue('indir');
     $odir = $this->GetParameterValue('outdir');
     $rfile = $this->GetParameterValue('download_url');
     $lfile = substr($rfile, strrpos($rfile, "/") + 1);
     // check if exists
     if (!file_exists($ldir . $lfile) or parent::getParameterValue('download') == 'true') {
         echo "dowloading {$rfile} ...";
         trigger_error("Will attempt to download ", E_USER_NOTICE);
         Utils::DownloadSingle($rfile, $ldir . $lfile);
         echo "done" . PHP_EOL;
     }
     // make sure we have the zip archive
     $zin = new ZipArchive();
     if ($zin->open($ldir . $lfile) === FALSE) {
         trigger_error("Unable to open {$ldir}{$lfile}");
         exit;
     }
     // get the work
     if ($this->GetParameterValue('files') == 'all') {
         $files = explode("|", $this->GetParameterList('files'));
         array_shift($files);
     } else {
         $files = explode("|", $this->GetParameterValue('files'));
     }
     $gz = false;
     if (strstr(parent::getParameterValue('output_format'), "gz")) {
         $gz = true;
     }
     $outfile = "ndc." . parent::getParameterValue('output_format');
     parent::setWriteFile($odir . $outfile, $gz);
     // now go through each item in the zip file and process
     foreach ($files as $file) {
         echo "Processing {$file}... ";
         $fpin = $zin->getStream($file . ".txt");
         if (!$fpin) {
             trigger_error("Unable to get pointer to {$file} in {$ldir}{$lfile}", E_USER_ERROR);
             return FALSE;
         }
         $this->{$file}($fpin);
         parent::writeRDFBufferToWriteFile();
         echo "done!" . PHP_EOL;
     }
     parent::getWriteFile()->close();
     echo "Generating dataset description for {$outfile}... ";
     //start generating dataset description file
     $dataset_description = '';
     $source_file = (new DataResource($this))->setURI($rfile)->setTitle("FDA National Drug Code Directory")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($ldir . $lfile)))->setFormat("text/tab-separated-value")->setFormat("application/zip")->setPublisher("http://www.fda.gov")->setHomepage("http://www.fda.gov/Drugs/InformationOnDrugs/ucm142438.htm")->setRights("use-share")->setLicense(null)->setDataset("http://identifiers.org/ndc/");
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ndc/ndc.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description = $source_file->toRDF() . $output_file->toRDF();
     //write dataset description to file
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #2
0
 function Parse($file)
 {
     parent::getReadFile()->read();
     // skip the first comment line
     $line = 1;
     $first = true;
     while ($l = parent::getReadFile()->read(500000)) {
         if ($l[0] == "#") {
             // dataset attributes
             $a = explode('=', trim($l));
             $r = $this->getVoc() . substr($a[0], 2);
             if (isset($a[1])) {
                 $v = $a[1];
                 if ($r == "affymetrix_vocabulary:genome-version-create_date") {
                     $x = explode("-", $a[1]);
                     if ($x[2] == "00") {
                         $x[2] = "01";
                     }
                     $v = implode("-", $x);
                 }
                 parent::addRDF(parent::triplifyString(parent::getDatasetURI(), $r, $v) . parent::describe($r, "{$r}"));
             }
             continue;
         }
         if ($first == true) {
             $first = false;
             // header
             $header = explode(",", str_replace('"', '', trim($l)));
             //				print_r($header);exit;
             $n = count($header);
             if ($n != 41) {
                 trigger_error("Expecting 41 columns, found {$n} in header on line {$line}!", E_USER_ERROR);
                 exit;
             }
             continue;
         }
         $a = explode('","', substr($l, 1, -2));
         $n = count($a);
         if ($n != 41) {
             trigger_error("Expecting 41 columns, found {$n} on line {$line}!", E_USER_ERROR);
             exit;
         }
         parent::writeRDFBufferToWriteFile();
         $id = $a[0];
         $qname = "affymetrix:{$id}";
         $label = "probeset {$a['0']} on GeneChip {$a['1']} ({$a['2']})";
         parent::addRDF(parent::describeIndividual($qname, $label, $this->getVoc() . "Probeset") . parent::describeClass($this->getVoc() . "Probeset", "Affymetrix probeset"));
         trigger_error($id, E_USER_NOTICE);
         // now process the entries
         foreach ($a as $k => $v) {
             if (trim($v) == '---') {
                 continue;
             }
             // multi-valued entries are separated by ////
             $b = explode(" /// ", $v);
             $r = $this->Map($k);
             if (isset($r)) {
                 foreach ($b as $c) {
                     $d = explode(" // ", $c);
                     if ($r == 'symbol') {
                         $d[0] = str_replace(" ", "-", $d[0]);
                     }
                     $s = $this->getRegistry()->getPreferredPrefix($r);
                     if ($s == "ec") {
                         $e = explode(":", $d[0]);
                         $d[0] = $e[1];
                     }
                     $this->addRDF(parent::triplify($qname, $this->getVoc() . "x-{$s}", "{$s}:" . $d[0]) . parent::describeProperty($this->getVoc() . "x-{$s}", "a relation to {$s}"));
                 }
             } else {
                 // we handle manually
                 unset($rel);
                 $label = $header[$k];
                 switch ($label) {
                     case 'GeneChip Array':
                         $array_id = parent::getRes() . str_replace(" ", "-", $v);
                         parent::addRDF(parent::triplify($qname, $this->getVoc() . "genechip-array", $array_id) . parent::describeIndividual($array_id, "Affymetrix {$v} GeneChip array", $this->getVoc() . "Genechip-Array") . parent::describeClass($this->getVoc() . "Genechip-Array", "Affymetrix GeneChip array"));
                         break;
                     case 'Gene Ontology Biological Process':
                         if (!isset($rel)) {
                             $rel = 'go-process';
                             $prefix = "go";
                         }
                     case 'Gene Ontology Cellular Component':
                         if (!isset($rel)) {
                             $rel = 'go-location';
                             $prefix = "go";
                         }
                     case 'Gene Ontology Molecular Function':
                         if (!isset($rel)) {
                             $rel = 'go-function';
                             $prefix = "go";
                         }
                         $b = explode(" /// ", $v);
                         foreach ($b as $c) {
                             $d = explode(" // ", $c);
                             parent::addRDF($this->triplify($qname, $this->getVoc() . $rel, "{$prefix}:" . $d[0]) . $this->describeProperty($this->getVoc() . $rel, "{$rel}"));
                         }
                         break;
                     case 'Transcript Assignments':
                         $b = explode(" /// ", $v);
                         foreach ($b as $c) {
                             $d = explode(" // ", $c);
                             $id = $d[0];
                             $prefix = $d[2];
                             if ($prefix == '---' || $id == '---') {
                                 continue;
                             } else {
                                 if ($prefix == 'gb' || $prefix == 'gb_htc') {
                                     $prefix = 'genbank';
                                 } else {
                                     if ($prefix == 'ncbibacterial') {
                                         $prefix = 'gi';
                                     } else {
                                         if ($prefix == 'ncbi_bacterial') {
                                             $prefix = 'gi';
                                         } else {
                                             if ($prefix == 'ens') {
                                                 $prefix = 'ensembl';
                                             } else {
                                                 if ($prefix == 'ncbi_mito' || $prefix == 'ncbi_organelle' || $prefix == 'organelle') {
                                                     $prefix = 'refseq';
                                                 } else {
                                                     if ($prefix == 'affx' || $prefix == 'unknown' || $prefix == "prop") {
                                                         $prefix = 'affymetrix';
                                                     } else {
                                                         if ($prefix == 'tigr_2004_08') {
                                                             $prefix = 'tigr';
                                                         } else {
                                                             if ($prefix == 'tigr-plantta') {
                                                                 $prefix = 'genbank';
                                                             } else {
                                                                 if ($prefix == 'newrs.gi') {
                                                                     $prefix = 'gi';
                                                                 } else {
                                                                     if ($prefix == 'newRS.gi') {
                                                                         $prefix = 'gi';
                                                                     } else {
                                                                         if ($prefix == 'primate_viral') {
                                                                             $prefix = 'genbank';
                                                                         } else {
                                                                             if ($prefix == 'jgi-bacterial') {
                                                                                 $prefix = 'ncbigene';
                                                                             } else {
                                                                                 if ($prefix == 'tb') {
                                                                                     $prefix = 'tuberculist';
                                                                                 } else {
                                                                                     if ($prefix == 'pa') {
                                                                                         $prefix = 'pseudomonas';
                                                                                     } else {
                                                                                         if ($prefix == 'gi|53267') {
                                                                                             $prefix = 'gi';
                                                                                             $id = '53267';
                                                                                         } else {
                                                                                             if ($prefix == 'broad-tcup') {
                                                                                                 $e = explode("-", $id);
                                                                                                 $id = $e[0];
                                                                                             } else {
                                                                                                 if ($prefix == 'organelle') {
                                                                                                     $e = explode("-", $id);
                                                                                                     $prefix = 'genbank';
                                                                                                     $id = $e[0];
                                                                                                 }
                                                                                             }
                                                                                         }
                                                                                     }
                                                                                 }
                                                                             }
                                                                         }
                                                                     }
                                                                 }
                                                             }
                                                         }
                                                     }
                                                 }
                                             }
                                         }
                                     }
                                 }
                             }
                             parent::addRDF(parent::triplify($qname, $this->getVoc() . "transcript-assignment", "{$prefix}:{$id}") . parent::describeProperty($this->getVoc() . "transcript-assignment", "transcript assignment"));
                         }
                         break;
                     case 'Annotation Transcript Cluster':
                         /*
                         							$id = substr($v,0,strpos($v,"("));
                         								
                         
                         							$rel = str_replace(" ","-",strtolower($label));
                         							$this->AddRDF($this->triplify($qname,parent::getVoc()."$rel", "refseq:$id"));
                         */
                         break;
                     case 'Annotation Date':
                         // Jun 9, 2011
                         $rel = "annotation-date";
                         preg_match("/^([A-Za-z]+) ([0-9]+), ([0-9]{4})\$/", $v, $m);
                         if (count($m) == 4) {
                             array_shift($m);
                             list($m, $day, $year) = $m;
                             $month = $this->getMonth($m);
                             if (!$day || $day == "0") {
                                 $day = "01";
                             }
                             $date = $year . "-" . $month . "-" . str_pad($day, 2, "0", STR_PAD_LEFT) . "T00:00:00Z";
                             parent::addRDF(parent::triplifyString($qname, $this->getVoc() . $rel, $date, "xsd:dateTime") . parent::describeProperty($this->getVoc() . $rel, "{$rel}"));
                         } else {
                             trigger_error("could not match date from {$v}", E_USER_ERROR);
                         }
                         break;
                     case 'Species Scientific Name':
                         break;
                     case 'Transcript ID(Array Design)':
                         if (!isset($rel)) {
                             $rel = 'transcript';
                         }
                     case 'Sequence type':
                     default:
                         if (!isset($rel)) {
                             $rel = str_replace(" ", "-", strtolower($label));
                         }
                         $b = explode(" /// ", $v);
                         foreach ($b as $c) {
                             parent::addRDF(parent::triplifyString($qname, $this->getVoc() . $rel, stripslashes($c)) . parent::describeProperty($this->getVoc() . $rel, "{$rel}"));
                         }
                         break;
                 }
                 //  switch
             }
             // else
         }
         $this->WriteRDFBufferToWriteFile();
     }
 }
Beispiel #3
0
 function twosides()
 {
     $items = null;
     $id = 0;
     $this->GetReadFile()->Read();
     while ($l = $this->GetReadFile()->Read()) {
         $a = explode("\t", $l);
         $id++;
         if ($id % 10000 == 0) {
             $this->WriteRDFBufferToWriteFile();
         }
         $uid = "twosides:{$id}";
         $d1 = "pubchemcompound:" . (int) sprintf("%d", substr($a[0], 4));
         $d1_name = $a[2];
         $d2 = "pubchemcompound:" . (int) sprintf("%d", substr($a[1], 4));
         $d2_name = $a[3];
         $e = "umls:" . $a[4];
         $e_name = strtolower($a[5]);
         $uid_label = "DDI between {$d1_name} and {$d2_name} leading to {$e_name}";
         if (!isset($items[$d1])) {
             parent::addRDF(parent::describeIndividual($d1, $d1_name, parent::getVoc() . "Chemical") . parent::describeClass(parent::getVoc() . "Chemical", "PharmGKB Chemical"));
             $items[$d1] = '';
         }
         if (!isset($items[$d2])) {
             parent::addRDF(parent::describeIndividual($d2, $d2_name, parent::getVoc() . "Chemical") . parent::describeClass(parent::getVoc() . "Chemical", "PharmGKB Chemical"));
             $items[$d2] = '';
         }
         if (!isset($items[$e])) {
             parent::addRDF(parent::describeIndividual($e, $e_name, parent::getVoc() . "Event") . parent::describeClass(parent::getVoc() . "Event", "PharmGKB side effect event"));
             $items[$e] = '';
         }
         parent::addRDF(parent::describeIndividual($uid, $uid_label, parent::getVoc() . "Drug-Drug-Association") . parent::describeClass(parent::getVoc() . "Drug-Drug-Association", "PharmGKB Twosides Drug-Drug Association") . parent::triplify($uid, parent::getVoc() . "chemical", $d1) . parent::triplify($uid, parent::getVoc() . "chemical", $d2) . parent::triplify($uid, parent::getVoc() . "event", $e) . parent::triplifyString($uid, parent::getVoc() . "p-value", $a[7]));
     }
     parent::writeRDFBufferToWriteFile();
 }
Beispiel #4
0
 function Run()
 {
     // directory shortcuts
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     // get the work specified
     $list = trim(parent::getParameterValue('files'));
     if ($list == 'all') {
         // call the getAllModelsId webservice
         $file = $ldir . "all_models.json";
         if (!file_exists($file)) {
             try {
                 $x = @new SoapClient("http://www.ebi.ac.uk/biomodels-main/services/BioModelsWebServices?wsdl");
             } catch (Exception $e) {
                 echo $e->getMessage();
             }
             $entries = $x->getAllModelsId();
             file_put_contents($file, json_encode($entries));
         } else {
             $entries = json_decode(file_get_contents($file));
         }
     } elseif ($list == 'curated') {
         // call the getAllCuratedModelsId webservice
         $file = $ldir . "curated_models.json";
         if (!file_exists($file)) {
             try {
                 $x = @new SoapClient("http://www.ebi.ac.uk/biomodels-main/services/BioModelsWebServices?wsdl");
             } catch (Exception $e) {
                 echo $e->getMessage();
             }
             $entries = $x->getAllCuratedModelsId();
             file_put_contents($file, json_encode($entries));
         } else {
             $entries = json_decode(file_get_contents($file));
         }
     } else {
         // check if a hyphenated list was provided
         if (($pos = strpos($list, "-")) !== FALSE) {
             $start_range = substr($list, 0, $pos);
             $end_range = substr($list, $pos + 1);
             for ($i = $start_range; $i <= $end_range; $i++) {
                 $entries[] = "BIOMD" . str_pad($i, 10, "0", STR_PAD_LEFT);
             }
         } else {
             // for comma separated list
             $b = explode(",", $this->GetParameterValue('files'));
             foreach ($b as $e) {
                 $entries[] = "BIOMD" . str_pad($e, 10, "0", STR_PAD_LEFT);
             }
         }
     }
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     // set the write file
     $suffix = parent::getParameterValue('output_format');
     $outfile = 'biomodels' . '.' . $suffix;
     $gz = false;
     if (strstr(parent::getParameterValue('output_format'), "gz")) {
         $gz = true;
     }
     $dataset_description = '';
     parent::setWriteFile($odir . $outfile, $gz);
     // iterate over the entries
     $i = 0;
     $total = count($entries);
     foreach ($entries as $id) {
         echo "processing " . ++$i . " of {$total} - biomodel# " . $id;
         $download_file = $ldir . $id . ".owl.gz";
         $url = parent::getParameterValue('download_url') . "publ/{$id}/{$id}-biopax3.owl";
         // download if the file doesn't exist or we are told to
         if (!file_exists($download_file) || $this->GetParameterValue('download') == 'true') {
             // download
             echo " - downloading";
             $ret = utils::downloadsingle($url, 'compress.zlib://' . $download_file, true);
             if ($ret === false) {
                 echo "\nTrying non-curated model";
                 $url = parent::getParametervalue('download_url') . "uncura_publ/{$id}/{$id}-biopax3.owl";
                 $ret = utils::downloadsingle($url, 'compress.zlib://' . $download_file, true);
                 if ($ret === false) {
                     continue;
                 }
             }
             echo " - downloaded";
         }
         // load entry, parse and write to file
         echo " - parsing... ";
         // $this->SetReadFile($download_file,true);
         $buf = file_get_contents("compress.zlib://" . $download_file);
         $converter = new BioPAX2Bio2RDF($this);
         $converter->SetBuffer($buf)->SetBioPAXVersion(3)->SetBaseNamespace("http://identifiers.org/biomodels.db/{$id}/")->SetBio2RDFNamespace("http://bio2rdf.org/biomodels:" . $id . "_")->SetDatasetURI($this->GetDatasetURI());
         $rdf = $converter->Parse();
         parent::addRDF($rdf);
         parent::writeRDFBufferToWriteFile();
         //generate dataset description
         $source_file = (new DataResource($this))->setURI($url)->setTitle("EBI BioModels Database - BioModel # {$id}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($download_file)))->setFormat("rdf/xml")->setPublisher("http://www.ebi.ac.uk/")->setHomepage("http://www.ebi.ac.uk/biomodels-main/")->setRights("use-share-modify")->setLicense("http://www.ebi.ac.uk/biomodels-main/termsofuse")->setDataset("http://identifiers.org/biomodels.db/");
         $dataset_description .= $source_file->toRDF();
         echo "done!" . PHP_EOL;
     }
     //foreach
     parent::getWriteFile()->close();
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/biomodels/biomodels.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description .= $output_file->toRDF();
     //write dataset description to file
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
 function Run()
 {
     // get the work
     if ($this->GetParameterValue('files') == 'all') {
         $sources = explode("|", parent::getParameterList('files'));
         array_shift($sources);
     } else {
         // comma separated list
         $sources = explode(",", parent::getParameterValue('files'));
     }
     $download_files = array("h**o-sapiens" => "Pathway%20Commons%202%20homo%20sapiens.BIOPAX.owl.gz", "hprd" => "Pathway%20Commons%202%20HPRD.BIOPAX.owl.gz", "humancyc" => "Pathway%20Commons%202%20HumanCyc.BIOPAX.owl.gz", "nci-nature" => "Pathway%20Commons%202%20NCI_Nature.BIOPAX.owl.gz", "panther-pathway" => "Pathway%20Commons%202%20PANTHER%20Pathway.BIOPAX.owl.gz", "phosphositeplus" => "Pathway%20Commons%202%20PhosphoSitePlus.BIOPAX.owl.gz", "reactome" => "Pathway%20Commons%202%20Reactome.BIOPAX.owl.gz");
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $dataset_description = '';
     // iterate over the requested data
     foreach ($sources as $source) {
         echo "processing {$source}... ";
         $ldir = parent::getParameterValue('indir');
         $odir = parent::getParameterValue('outdir');
         $rdir = parent::getParameterValue('download_url');
         // set the remote and input files
         $file = $source . ".owl";
         $zfile = $source . ".owl.gz";
         $rfile = $rdir . $download_files[$source];
         $lfile = $ldir . $zfile;
         // download if if the file doesn't exist locally or we are told to
         if (!file_exists($lfile) || $this->GetParameterValue('download') == 'true') {
             // download
             echo "downloading... ";
             file_put_contents($lfile, file_get_contents($rfile));
         }
         // extract the file out of the ziparchive
         // and load into a buffer
         echo 'extracting... ';
         if (($fpin = gzopen($lfile, "r")) === FALSE) {
             trigger_error("Unable to open {$lfile}", E_USER_ERROR);
             exit;
         }
         $data = '';
         while (!gzeof($fpin)) {
             $buffer = gzgets($fpin, 4096);
             $data .= $buffer;
         }
         gzclose($fpin);
         // set the output file
         $suffix = parent::getParameterValue('output_format');
         $outfile = $source . '.' . $suffix;
         $gz = false;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         parent::setWriteFile($odir . $outfile, $gz);
         // send for parsing
         $p = new BioPAX2Bio2RDF($this);
         $p->SetBuffer($data)->SetBioPAXVersion(3)->SetBaseNamespace("http://purl.org/pc2/3/")->SetBio2RDFNamespace("http://bio2rdf.org/pathwaycommons:")->SetDatasetURI(parent::getDatasetURI());
         $rdf = $p->Parse();
         parent::addRDF($rdf);
         // write to output
         parent::writeRDFBufferToWriteFile();
         parent::getWriteFile()->Close();
         echo "done!" . PHP_EOL;
         //generate dataset description
         echo "Generating dataset description for {$zfile}... ";
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Pathway Commons")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("rdf/xml")->setPublisher("http://www.pathwaycommons.org/")->setHomepage("http://www.pathwaycommons.org/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("http://www.pathwaycommons.org/pc2/home.html#data_sources")->setDataset("http://identifiers.org/pathwaycommons/");
         $dataset_description .= $source_file->toRDF();
         echo "done!" . PHP_EOL;
     }
     echo "Generating dataset description for Bio2RDF Pathways Commons dataset... ";
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pathwaycommons/pathwaycommons.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description .= $output_file->toRDF();
     //write dataset description to file
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
 /**
  * process a results xml file from the download directory
  **/
 function process_file($infile)
 {
     $indir = parent::getParameterValue('indir');
     $xml = new CXML($infile);
     $this->setCheckPoint('file');
     while ($xml->Parse("clinical_study") == TRUE) {
         $this->setCheckPoint('record');
         $this->root = $root = $xml->GetXMLRoot();
         $this->nct_id = $nct_id = $this->getString("//id_info/nct_id");
         $this->study_id = $study_id = parent::getNamespace() . "{$nct_id}";
         ### declare
         $label = $this->getString("//brief_title");
         if (!$label) {
             $label = $this->getString("//official_title");
         }
         if (!$label) {
             $label = "Clinical trial #" . $nct_id;
         }
         parent::addRDF(parent::describeIndividual($study_id, $label, parent::getVoc() . "Clinical-Study") . parent::describeClass(parent::getVoc() . "Clinical-Study", "Clinical Study"));
         ##########################################################################################
         #required header
         ##########################################################################################
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "download-date", $this->getString('//required_header/download_date')) . parent::triplify($study_id, parent::getVoc() . "url", $this->getString('//required_header/url')));
         ##########################################################################################
         #identifiers
         ##########################################################################################
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "nct-id", $this->getString('//id_info/nct_id'), "xsd:string") . parent::triplifyString($study_id, parent::getVoc() . "org-study-id", $this->getString('//id_info/org_study_id'), "xsd:string"));
         $sids = $root->xpath('//id_info/secondary_id');
         if (isset($sids)) {
             foreach ($sids as $id) {
                 parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "secondary-id", (string) $id, "xsd:string"));
             }
         }
         $nctaliases = $root->xpath('//id_info/nct-alias');
         if (isset($nctaliases)) {
             foreach ($nctaliases as $id) {
                 parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "nct-alias", (string) $id, "xsd:string"));
             }
         }
         ##########################################################################################
         #titles
         ##########################################################################################
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "brief-title", $this->getString("//brief_title")) . parent::triplifyString($study_id, parent::getVoc() . "official-title", $this->getString("//official_title")));
         ###################################################################################
         #brief summary
         ###################################################################################
         $brief_summary = str_replace(array("\r", "\n", "\t"), array("&#xD;", "&#xA;", "&#x9;"), $this->getString('//brief_summary/textblock'));
         parent::addRDF(parent::triplifyString($study_id, $this->getVoc() . "brief-summary", $brief_summary));
         ####################################################################################
         # detailed description
         ####################################################################################
         $d = str_replace(array("\r", "\n", "\t"), array("&#xD;", "&#xA;", "&#x9;"), $this->getString('//detailed_description/textblock'));
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "detailed-description", $d));
         #########################################################################################
         #acronym
         #########################################################################################
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "acronym", $this->getString("//acronym")));
         ########################################################################################
         #sponsors
         ########################################################################################
         try {
             $sponsors = array("lead_sponsor", "collaborator");
             foreach ($sponsors as $sponsor) {
                 $a = @array_shift($root->xpath('//sponsors/' . $sponsor));
                 if ($a == null) {
                     break;
                 }
                 $agency = $this->getString("//agency", $a);
                 $agency_id = parent::getRes() . md5($agency);
                 $agency_class = $this->getString("//agency_class", $a);
                 $agency_class_id = parent::getRes() . md5($agency_class);
                 parent::addRDF(parent::describeIndividual($agency_id, $agency, parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $sponsor), $agency_id) . parent::describeIndividual($agency_class_id, $agency_class, parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplify($agency_id, parent::getVoc() . "organization", $agency_class_id));
             }
         } catch (Exception $e) {
             echo "There was an error in the lead sponsor element: {$e}\n";
         }
         #################################################################################
         # source
         #################################################################################
         $source = $this->getString('//source');
         if ($source) {
             $source_id = parent::getRes() . md5($source);
             parent::addRDF(parent::describeIndividual($source_id, $source, parent::getVoc() . "Organization") . parent::triplify($study_id, parent::getVoc() . "source", $source_id));
         }
         ######################################################################################
         # oversight
         ######################################################################################
         try {
             $oversight = @array_shift($root->xpath('//oversight_info'));
             $oversight_id = parent::getRes() . md5($oversight->asXML());
             $authority = $this->getString('//authority', $oversight);
             $authority_id = parent::getRes() . md5($authority);
             parent::addRDF(parent::describeIndividual($oversight_id, $authority, parent::getVoc() . "Organization") . parent::triplify($study_id, $this->getVoc() . "oversight", $oversight_id) . parent::triplify($study_id, $this->getVoc() . "authority", $authority_id) . parent::triplifyString($oversight_id, parent::getVoc() . "has-dmc", $this->getString('//has_dmc', $oversight)));
         } catch (Exception $e) {
             echo "There was an error in the oversight info element: {$e}\n";
         }
         #################################################################################
         # overall status
         #################################################################################
         $overall_status = $this->getString('//overall_status');
         if ($overall_status) {
             $status_id = parent::getRes() . md5($overall_status);
             parent::addRDF(parent::describeIndividual($status_id, $overall_status, parent::getVoc() . "Status") . parent::describeClass(parent::getVoc() . "Status", "Status") . parent::triplify($study_id, parent::getVoc() . "overall-status", $status_id));
         }
         #########################################################################################
         #why stopped
         #########################################################################################
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "why-stopped", $this->getString("//why_stopped")));
         ##################################################################################
         # dates
         ##################################################################################
         $dates = array("start_date", "end_date", "completion_date", "primary_completion_date", "verification_date", "lastchanged_date", "firstreceived_date", "firstreceived_results_date");
         foreach ($dates as $date) {
             $d = $this->getString('//' . $date);
             if ($d) {
                 $datetime = $this->getDatetimeFromDate($d);
                 if (isset($datetime)) {
                     parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . str_replace("_", "-", $date), $datetime));
                 } else {
                     trigger_error("unable to parse date: {$d}", E_USER_ERROR);
                 }
             }
         }
         ####################################################################################
         # phase
         ####################################################################################
         $phase = $this->getString('//phase');
         if ($phase && $phase != "N/A") {
             $phase_id = $this->getRes() . md5($phase);
             parent::addRDF(parent::describeIndividual($phase_id, $phase, parent::getVoc() . "Phase", $phase) . parent::describeClass(parent::getVoc() . "Phase", $phase) . parent::triplify($study_id, parent::getVoc() . "phase", $phase_id));
         }
         ###################################################################################
         # study type
         ####################################################################################
         $study_type = $this->getString('//study_type');
         if ($study_type) {
             $study_type_id = $this->getRes() . md5($study_type);
             parent::addRDF(parent::describeClass($study_type_id, $study_type, parent::getVoc() . "Study-Type") . parent::describeClass(parent::getVoc() . "Study-Type", "Study Type") . parent::triplify($study_id, parent::getVoc() . "study-type", $study_type_id));
         }
         ###############################################################################
         # study design
         ###############################################################################
         $study_design = $this->getString('//study_design');
         if ($study_design) {
             $study_design_id = parent::getRes() . md5($study_id . $study_design);
             parent::addRDF(parent::describeIndividual($study_design_id, "{$study_id} study design", parent::getVoc() . "Study-Design") . parent::describeClass(parent::getVoc() . "Study-Design", "Study Design") . parent::triplify($study_id, parent::getVoc() . "study-design", $study_design_id));
             // Intervention Model: Parallel Assignment, Masking: Double-Blind, Primary Purpose: Treatment
             foreach (explode(", ", $study_design) as $i => $b) {
                 $c = explode(":  ", $b);
                 if (isset($c[1])) {
                     $sdp = $study_design_id . "-" . ($i + 1);
                     $key = parent::getRes() . md5($c[0]);
                     $value = parent::getRes() . md5($c[1]);
                     parent::addRDF(parent::describeIndividual($sdp, $b, parent::getVoc() . "Study-Design-Parameter") . parent::describeClass(parent::getVoc() . "Study-Design-Parameter", "Study Design Parameter") . parent::triplify($sdp, parent::getVoc() . "key", $key) . parent::describeClass($key, $c[0]) . parent::triplify($sdp, parent::getVoc() . "value", $value) . parent::describeClass($value, $c[1]) . parent::triplify($study_design_id, parent::getVoc() . "study-design-parameter", $sdp));
                 }
             }
         }
         ####################################################################################
         # target duration
         ####################################################################################
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "target-duration", $this->getString('//target_duration')));
         ################################################################################
         # outcomes
         ###############################################################################
         $outcomes = array("primary_outcome", "secondary_outcome", "other_outcome");
         foreach ($outcomes as $outcome) {
             $o = $root->xpath('//' . $outcome);
             if ($o) {
                 $os = $o;
                 if (!is_array($o)) {
                     $os = array($o);
                 }
                 foreach ($os as $o) {
                     try {
                         $po_id = parent::getRes() . md5($nct_id . $o->asXML());
                         $po_type = parent::getVoc() . str_replace("_", "-", $outcome);
                         $measure = $this->getString('//measure', $o);
                         $time_frame = $this->getString('//time_frame', $o);
                         $safety_issue = $this->getString('//saftey_issue', $o);
                         $description = $this->getString('//description', $o);
                         parent::addRDF(parent::describeIndividual($po_id, $measure . " " . $time_frame, ucfirst($po_type)) . parent::describeClass(ucfirst($po_type), str_replace("_", " ", ucfirst($outcome))) . parent::triplifyString($po_id, "dc:description", $description) . parent::triplifyString($po_id, parent::getVoc() . "measure", $measure) . parent::triplifyString($po_id, parent::getVoc() . "time-frame", $time_frame) . parent::triplifyString($po_id, parent::getVoc() . "safety-issue", $safety_issue) . parent::triplify($study_id, parent::getVoc() . $po_type, $po_id));
                     } catch (Exception $e) {
                         echo "There was an error parsing the primary outcome element: {$e} \n";
                     }
                 }
             }
         }
         ##############################################################################
         #number of arms
         ##############################################################################
         try {
             parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "number-of-arms", $this->getString('//number_of_arms')));
         } catch (Exception $e) {
             echo "There was an exception parsing the number of arms element: {$e}\n";
         }
         ##############################################################################
         #number of groups
         ##############################################################################
         try {
             parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "number-of-arms", $this->getString('//number_of_groups')));
         } catch (Exception $e) {
             echo "There was an exception parsing the number of groups: {$e}\n";
         }
         ##############################################################################
         #enrollment
         ##############################################################################
         try {
             $e = $root->xpath('//enrollment');
             if ($e) {
                 $type = strtolower((string) $e[0]->attributes()->type);
                 $value = $this->getString('//enrollment');
                 parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . ($type ? $type . "-" : "") . "enrollment", $value));
             }
         } catch (Exception $e) {
             echo "There was an exception parsing the enrollment element: {$e}\n";
         }
         ###############################################################################
         #condition
         ###############################################################################
         try {
             $conditions = $root->xpath('//condition');
             foreach ($conditions as $condition) {
                 $mesh_label_id = parent::getRes() . md5($condition);
                 parent::addRDF(parent::triplify($study_id, parent::getVoc() . "condition", $mesh_label_id) . parent::describeClass($mesh_label_id, $condition, parent::getVoc() . "Condition") . parent::describeClass(parent::getVoc() . "Condition", "Condition"));
             }
         } catch (Exception $e) {
             echo "There was an exception parsing condition element: {$e}\n";
         }
         ################################################################################
         # arm_group
         ################################################################################
         try {
             $arm_groups = $root->xpath('//arm_group');
             foreach ($arm_groups as $arm_group) {
                 $arm_group_id = $this->getString('./arm_group_label', $arm_group);
                 $arm_group_id = md5($arm_group_id);
                 $arm_group_uri = parent::getRes() . $this->nct_id . "/arm-group/" . $arm_group_id;
                 $arm_group_label = $this->nct_id . " arm group " . $arm_group_id;
                 $arm_group_type = ucfirst(str_replace(" ", "_", $this->getString('./arm_group_type', $arm_group)));
                 if (!$arm_group_type) {
                     $arm_group_type = "Clinical-Arm";
                 }
                 $description = $this->getString('./description', $arm_group);
                 parent::addRDF(parent::describeIndividual($arm_group_uri, $arm_group_label, parent::getVoc() . $arm_group_type) . parent::describeClass(parent::getVoc() . $arm_group_type, ucfirst(str_replace("_", " ", $arm_group_type))) . parent::triplifyString($arm_group_uri, parent::getVoc() . "description", $description) . parent::describeIndividual($arm_group_uri, $arm_group, parent::getVoc() . "Arm-Group") . parent::describeClass(parent::getVoc() . "Arm-Group", "Arm Group") . parent::triplify($study_id, parent::getVoc() . "arm-group", $arm_group_uri));
             }
         } catch (Exception $e) {
             echo "There was an exception in arm groups: {$e}\n";
         }
         ##############################################################################
         #intervention
         ##############################################################################
         try {
             $interventions = $root->xpath('//intervention');
             foreach ($interventions as $intervention) {
                 $intervention_id = parent::getRes() . md5($intervention->asXML());
                 $intervention_name = $this->getString('./intervention_name', $intervention);
                 $intervention_type = $this->getString('./intervention_type', $intervention);
                 $intervention_type_uri = parent::getVoc() . ucfirst(str_replace(" ", "_", $intervention_type));
                 $intervention_desc = $this->getString('./description', $intervention);
                 $intervention_on = $this->getString('./other_name', $intervention);
                 parent::addRDF(parent::describeIndividual($intervention_id, $intervention_name, $intervention_type_uri) . parent::describeClass($intervention_type_uri, $intervention_type) . parent::triplifyString($intervention_id, parent::getVoc() . "intervention-name", $intervention_name) . parent::triplifyString($intervention_id, parent::getVoc() . "intervention-desc", $intervention_desc) . parent::triplifyString($intervention_id, parent::getVoc() . "other-name", $intervention_on) . parent::triplify($study_id, parent::getvoc() . "intervention", $intervention_id));
                 $agl = $intervention->xpath("./arm_group_label");
                 foreach ($agl as $a) {
                     $arm_group_id = md5($a);
                     $ag = parent::getRes() . $this->nct_id . "/arm-group/" . $arm_group_id;
                     parent::addRDF(parent::describeIndividual($ag, $a, parent::getVoc() . "Arm-Group") . parent::describeClass(parent::getVoc() . "Arm-Group", "Arm Group") . parent::triplify($intervention_id, parent::getVoc() . "arm-group", $ag));
                 }
             }
         } catch (Exception $e) {
             echo "There was an error in interventions {$e}\n";
         }
         ###############################################################################
         #eligibility
         ################################################################################
         try {
             $eligibility = @array_shift($root->xpath('//eligibility'));
             if ($eligibility !== null) {
                 $eligibility_label = "eligibility for " . $study_id;
                 $eligibility_id = parent::getRes() . md5($eligibility->asXML());
                 parent::addRDF(parent::describeIndividual($eligibility_id, $eligibility_label, parent::getVoc() . "Eligibility") . parent::describeClass(parent::getVoc() . "Eligibility", "Eligibility") . parent::triplify($study_id, parent::getVoc() . "eligibility", $eligibility_id));
                 if ($criteria = @array_shift($eligibility->xpath('./criteria'))) {
                     $text = @array_shift($criteria->xpath('./textblock'));
                     parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . "text", $text));
                     $c = preg_split("/(Inclusion Criteria\\:|Exclusion Criteria\\:)/", $text);
                     //inclusion
                     if (isset($c[1])) {
                         $d = explode(" - ", $c[1]);
                         // the lists are separated by a hyphen
                         foreach ($d as $inclusion) {
                             $inc = trim($inclusion);
                             if ($inc != '') {
                                 $inc_id = parent::getRes() . md5($inc);
                                 parent::addRDF(parent::describeIndividual($inc_id, $inc, parent::getVoc() . "Inclusion-Criteria") . parent::describeClass(parent::getVoc() . "Inclusion-Criteria", "Inclusion Criteria") . parent::triplify($eligibility_id, parent::getVoc() . "inclusion-criteria", $inc_id));
                             }
                         }
                     }
                     //exclusion
                     if (isset($c[2])) {
                         $d = explode(" - ", $c[1]);
                         foreach ($d as $exclusion) {
                             $exc = trim($exclusion);
                             if ($exc != '') {
                                 $exc_id = parent::getRes() . md5($exc);
                                 parent::addRDF(parent::describeIndividual($exc_id, $exc, parent::getVoc() . "Exclusion-Criteria") . parent::describeClass(parent::getVoc() . "Exclusion-Criteria", "Exclusion Criteria") . parent::triplify($eligibility_id, parent::getVoc() . "exclusion-criteria", $exc_id));
                             }
                         }
                     }
                 }
                 parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . "gender", $this->getString('./gender', $eligibility)));
                 parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . "healthy-volunteers", $this->getString('./healthy_volunteers', $eligibility)));
                 $attributes = array('minimum_age', 'maximum_age');
                 foreach ($attributes as $a) {
                     $s = $this->getString('./' . $a, $eligibility);
                     if ($s != 'N/A') {
                         $age = trim(str_replace("Years", "", $s));
                         parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . str_replace("_", "-", $a), $age));
                     }
                 }
                 $attributes = array("study_pop" => "study-population", "sampling_method" => "sampling-method");
                 foreach ($attributes as $a => $r) {
                     $e = @array_shift($eligibility->xpath('./' . $a));
                     if ($s = $this->getString('./' . $a, $eligibility)) {
                         parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . $r, $this->getString('./textblock', $e)));
                     }
                 }
             }
         } catch (Exception $e) {
             echo "There was an error in eligibility: {$e}\n";
         }
         ######################################################################################
         #biospec
         #####################################################################################
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "biospec-retention", $this->getString('//biospec_retention')));
         try {
             $b = @array_shift($root->xpath('//biospec_descr'));
             if ($b) {
                 parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "biospec_descr", $this->getString('./textblock', $b)));
             }
         } catch (Exception $e) {
             echo "There was an error in biospec_descr: {$e}\n";
         }
         ###################################################################
         # contacts
         ###################################################################
         $contacts = array("overall_official", "overall_contact", "overall_contact_backup");
         try {
             foreach ($contacts as $c) {
                 $d = @array_shift($root->xpath('//' . $c));
                 if ($d) {
                     parent::addRDF(parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $c), $this->makeContact($d)));
                 }
             }
         } catch (Exception $e) {
             echo "There was an error parsing overall contact: {$e}" . "\n";
         }
         ##############################################################
         # location of facility doing the testing
         ##############################################################
         try {
             $location = @array_shift($root->xpath('//location'));
             if ($location) {
                 $location_uri = parent::getRes() . md5($location->asXML());
                 $name = $this->getString('//facility/name', $location);
                 $address = @array_shift($location->xpath('//facility/address'));
                 $contact = @array_shift($location->xpath('//contact'));
                 $backups = @array_shift($location->xpath('//contact_backup'));
                 $investigators = @array_shift($location->xpath('//investigator'));
                 parent::addRDF(parent::describeIndividual($location_uri, $name, parent::getVoc() . "Location") . parent::describeClass(parent::getVoc() . "Location", "Location") . parent::triplifyString($location_uri, parent::getVoc() . "status", $this->getString('//status', $location)) . parent::triplify($study_id, parent::getVoc() . "location", $location_uri) . parent::triplify($location_uri, parent::getVoc() . "address", $this->makeAddress($address)) . ($contact != null ? parent::triplify($location_uri, parent::getVoc() . "contact", $this->makeContact($contact)) : ""));
                 if ($backups) {
                     foreach ($backups as $backup) {
                         parent::addRDF(parent::triplify($location_uri, parent::getVoc() . "contact-backup", $this->makeContact($backup)));
                     }
                 }
                 if ($investigators) {
                     foreach ($investigators as $investigator) {
                         parent::addRDF(parent::triplify($location_uri, parent::getVoc() . "investigator", $this->makeContact($investigator)));
                     }
                 }
             }
         } catch (Exception $e) {
             echo "There was an error parsing location: {$e}" . "\n";
         }
         ######################################################################
         #countries
         ######################################################################
         try {
             $a = array("location_countries", "removed_countries");
             foreach ($a as $country) {
                 $lc = @array_shift($root->xpath('//' . $country));
                 if ($lc) {
                     $label = $this->getString('//country', $lc);
                     $cid = parent::getRes() . md5($label);
                     parent::addRDF(parent::describeIndividual($cid, $label, parent::getVoc() . "Country") . parent::describeClass(parent::getVoc() . "Country", "Country") . parent::triplify($study_id, parent::getVoc() . "country", $cid));
                 }
             }
         } catch (Exception $e) {
             echo "There was an error parsing country: {$e}" . "\n";
         }
         ######################################################################
         #reference
         ######################################################################
         try {
             $a = array("reference", "result_reference");
             foreach ($a as $ref_type) {
                 $references = $root->xpath('//' . $ref_type);
                 foreach ($references as $reference) {
                     $p = $this->getString('./PMID', $reference);
                     if ($p) {
                         $pmid = "pubmed:{$p}";
                         parent::addRDF(parent::describeIndividual($pmid, $p, parent::getVoc() . "Reference") . parent::describeClass(parent::getVoc() . "Reference", "Reference") . parent::triplifyString($pmid, parent::getVoc() . "citation", $this->getString('./citation', $reference)) . parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $ref_type), $pmid));
                     }
                 }
             }
         } catch (Exception $e) {
             echo "There was an error parsing references element: {$e}\n";
         }
         #######################################################################
         #link
         #######################################################################
         try {
             $links = $root->xpath('//link');
             foreach ($links as $i => $link) {
                 $url = $this->getString('./url', $link);
                 $url = preg_replace("/>.*\$/", "", $url);
                 $lid = parent::getRes() . md5($url);
                 parent::addRDF(parent::describeIndividual($lid, $this->getString('./description', $link), parent::getVoc() . "Link") . parent::describeClass(parent::getVoc() . "Link", "Link") . parent::triplify($lid, parent::getVoc() . "url", $url) . parent::triplify($study_id, parent::getVoc() . "link", $lid));
             }
         } catch (Exception $e) {
             echo "There was an error parsing link element: {$e}\n";
         }
         ############################################################################
         #responsible party
         ############################################################################
         try {
             $rp = @array_shift($root->xpath('//responsible_party'));
             if ($rp) {
                 $rp_id = parent::getRes() . md5($rp->asXML());
                 $label = $this->getString('./name_title', $rp);
                 if (!$label) {
                     $label = $this->getString('./organization', $rp);
                 } else {
                     $label .= ", " . $this->getString('./organization', $rp);
                 }
                 if (!$label) {
                     $label = $this->getString('./party_type', $rp);
                 }
                 $org_id = parent::getRes() . md5($this->getString('./organization', $rp));
                 parent::addRDF(parent::describeIndividual($rp_id, $label, parent::getVoc() . "Responsible-Party") . parent::describeClass(parent::getVoc() . "Responsible-Party", "Responsible Party") . parent::triplify($study_id, parent::getVoc() . "responsible-party", $rp_id) . parent::triplify($rp_id, parent::getVoc() . "organization", $org_id) . parent::describeIndividual($org_id, $this->getString('./organization', $rp), parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplifyString($rp_id, parent::getVoc() . "name-title", $this->getString('./name_title', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "party-type", $this->getString('./party_type', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "investigator-affiliation", $this->getString('./investigator_affiliation', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "investigator-full-name", $this->getString('./investigator_full_name', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "investigator-title", $this->getString('./investigator_title', $rp)));
             }
         } catch (Exception $e) {
             echo "There was an error parsing the responsible_party element: {$e}\n";
         }
         ##############################################################################
         # keywords
         ##############################################################################
         try {
             $keywords = $root->xpath('//keyword');
             foreach ($keywords as $keyword) {
                 parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "keyword", (string) $keyword));
             }
         } catch (Exception $e) {
             echo "There was an error parsing the keywords element: {$e}";
         }
         # mesh terms
         # note: mesh terms are assigned using an imperfect algorithm
         try {
             $mesh_terms = $root->xpath('//condition_browse/mesh_term');
             foreach ($mesh_terms as $mesh_term) {
                 $term = (string) $mesh_term;
                 $mesh_id = parent::getRes() . md5($term);
                 parent::addRDF(parent::triplify($study_id, parent::getVoc() . "condition-mesh", $mesh_id));
                 parent::addRDF(parent::triplifyString($mesh_id, "rdfs:label", $term));
             }
         } catch (Exception $e) {
             echo "There was an error in mesh_terms: {$e}\n";
         }
         ################################################################################
         # regulated by fda?  is section 801? has expanded access?
         ################################################################################
         try {
             parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "is-fda-regulated", $this->getString('is_fda_regulated')) . parent::triplifyString($study_id, parent::getVoc() . "is-section-801", $this->getString('is_section_801')) . parent::triplifyString($study_id, parent::getVoc() . "has-expanded-access", $this->getString('has_expanded_access')));
         } catch (Exception $e) {
             echo "There was an error parsing the is_fda_regulated element: {$e}\n";
         }
         ###############################################################################
         # mesh terms for the intervention browse
         ###############################################################################
         try {
             $a = array("condition_browse", "intervention_browse");
             foreach ($a as $browse_type) {
                 $terms = $root->xpath("//{$browse_type}/mesh_term");
                 foreach ($terms as $term) {
                     $term_label = (string) $term;
                     $term_id = parent::getRes() . md5($term);
                     parent::addRDF(parent::describeIndividual($term_id, $term_label, parent::getVoc() . "Term") . parent::describeClass(parent::getVoc() . "Term", "Term") . parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $browse_type), $term_id));
                 }
             }
         } catch (Exception $e) {
             echo "There was an error parsing {$browse_type}/mesh_term element: {$e}\n";
         }
         ################################################################################
         # clinical results
         ################################################################################
         try {
             $cr = @array_shift($root->xpath('//clinical_results'));
             if ($cr) {
                 $cr_id = parent::getRes() . md5($study_id . $cr->asXML());
                 parent::addRDF(parent::describeIndividual($cr_id, "clinical results for {$study_id}", parent::getVoc() . "Clinical-Result") . parent::describeClass(parent::getVoc() . "Clinical-Result", "Clinical Result") . parent::triplifyString($cr_id, parent::getVoc() . "description", $this->getString('./desc', $cr)) . parent::triplifyString($cr_id, parent::getVoc() . "restrictive-agreement", $this->getString('./restrictive_agreement', $cr)) . parent::triplifyString($cr_id, parent::getVoc() . "limitations-and-caveats", $this->getString('./limitations_and_caveats', $cr)) . parent::triplify($study_id, parent::getVoc() . "clinical-result", $cr_id));
             }
         } catch (Exception $e) {
             echo "There was an error parsing clinical results: {$e}\n";
         }
         ################################################################################
         # Participant Flow
         ################################################################################
         try {
             $pc = 1;
             $mc = 1;
             $wc = 1;
             $pf = @array_shift($root->xpath('//clinical_results/participant_flow'));
             if ($pf) {
                 $pf_id = parent::getRes() . md5($pf->asXML());
                 parent::addRDF(parent::describeIndividual($pf_id, "participant flow for {$study_id}", parent::getVoc() . "Participant-Flow") . parent::describeClass(parent::getVoc() . "Participant-Flow", "Participant-Flow") . parent::triplify($study_id, parent::getVoc() . "participant-flow", $pf_id) . parent::triplifyString($pf_id, parent::getVoc() . "recruitment-details", $this->getString('./recruitment_details', $pf)) . parent::triplifyString($pf_id, parent::getVoc() . "pre-assignment-details", $this->getString('./pre_assignment_details', $pf)));
                 $groups = @array_shift($pf->xpath('./group_list'));
                 foreach ($groups as $group) {
                     parent::addRDF(parent::triplify($pf_id, parent::getVoc() . "group", $this->makeGroup($group)));
                 }
                 //period_list
                 $periods = @array_shift($pf->xpath('./period_list'));
                 foreach ($periods as $period) {
                     $period_id = parent::getRes() . $nct_id . "/period/" . $pc++;
                     $period_title = $this->getString('./title', $period);
                     parent::addRDF(parent::describeIndividual($period_id, $period_title . " for {$nct_id}", parent::getVoc() . "Period") . parent::describeClass(parent::getVoc() . "Period", "Period") . parent::triplify($pf_id, parent::getVoc() . "period", $period_id));
                     // milestones
                     $milestones = @array_shift($period->xpath('./milestone_list'));
                     if ($milestones) {
                         foreach ($milestones as $milestone) {
                             $milestone_id = parent::getRes() . $nct_id . "/milestone/" . $mc++;
                             $label = $this->getString('./title', $milestone);
                             parent::addRDF(parent::describeIndividual($milestone_id, $label, parent::getVoc() . "Milestone") . parent::describeClass(parent::getVoc() . "Milestone", "Milestone") . parent::triplify($period_id, parent::getVoc() . "milestone", $milestone_id));
                             // participants
                             $p = 1;
                             $ps_list = @array_shift($milestone->xpath('./participants_list'));
                             foreach ($ps_list as $ps) {
                                 $ps_id = $milestone_id . "/p/" . $p++;
                                 $group_id = parent::getRes() . $this->nct_id . "/group/" . $ps->attributes()->group_id;
                                 $count = (string) $ps->attributes()->count;
                                 parent::addRDF(parent::describeIndividual($ps_id, "participant counts in " . $ps->attributes()->group_id . " for milestone {$mc} of {$nct_id}", parent::getVoc() . "Participant-Count") . parent::describeClass(parent::getVoc() . "Participant-Count", "Participant Count") . parent::triplify($ps_id, parent::getVoc() . "group", $group_id) . parent::triplifyString($ps_id, parent::getVoc() . "count", $count) . parent::triplify($milestone_id, parent::getVoc() . "participant-counts", $ps_id));
                             }
                         }
                     }
                     // milestones
                     $withdraws = @array_shift($period->xpath('./drop_withdraw_reason_list'));
                     if ($withdraws) {
                         foreach ($withdraws as $withdraw) {
                             $wid = parent::getRes() . $this->nct_id . "/withdraw/" . $wc++;
                             $label = $this->getString('./title', $withdraw);
                             parent::addRDF(parent::describeIndividual($wid, $label, parent::getVoc() . "Withdraw-Reason") . parent::describeClass(parent::getVoc() . "Withdraw-Reason", "Withdraw Reason"));
                             // participants
                             $ps_list = @array_shift($withdraw->xpath('./participants_list'));
                             foreach ($ps_list as $ps) {
                                 $group_id = parent::getRes() . $nct_id . "/group/" . $ps->attributes()->group_id;
                                 $count = (string) $ps->attributes()->count;
                                 parent::addRDF(parent::triplify($wid, parent::getVoc() . "group", $group_id) . parent::triplifyString($wid, parent::getVoc() . "count", $count));
                             }
                         }
                     }
                 }
             }
         } catch (Exception $e) {
             echo "There was an error parsing participant flow element: {$e}\n";
         }
         ################################################################################
         # baseline
         ################################################################################
         try {
             $baseline = @array_shift($root->xpath('//baseline'));
             if ($baseline) {
                 $b_id = $this->nct_id . "/baseline";
                 $b_uri = parent::getRes() . $b_id;
                 // group list
                 $groups = @array_shift($baseline->xpath('./group_list'));
                 foreach ($groups as $group) {
                     parent::addRDF(parent::describeIndividual($b_uri, "baseline for {$nct_id}", parent::getVoc() . "Baseline") . parent::describeClass(parent::getVoc() . "Baseline", "Baseline") . parent::triplify($b_uri, parent::getVoc() . "group", $this->makeGroup($group)) . parent::triplify($study_id, parent::getVoc() . "baseline", $b_uri));
                 }
                 // measure list
                 $measures = @array_shift($baseline->xpath('./measure_list'));
                 foreach ($measures as $measure) {
                     parent::addRDF(parent::triplify($b_uri, parent::getVoc() . "measure", $this->makeMeasure($measure)));
                 }
             }
         } catch (Exception $e) {
             echo "Error in parsing baseline" . PHP_EOL;
         }
         ################################################################################
         # outcomes
         ################################################################################
         try {
             $outcomes = @array_shift($root->xpath('//outcome_list'));
             if ($outcomes) {
                 foreach ($outcomes as $i => $outcome) {
                     $outcome_id = $this->nct_id . "/outcome/" . ($i + 1);
                     $outcome_uri = parent::getRes() . $outcome_id;
                     $outcome_label = $this->getString("./title", $outcome);
                     if (!$outcome_label) {
                         $outcome_label = "outcome for " . $this->nct_id;
                     }
                     parent::addRDF(parent::describeIndividual($outcome_uri, $outcome_label, parent::getVoc() . "Outcome", $this->getString("./description", $outcome)) . parent::describeClass(parent::getVoc() . "Outcome", "Outcome") . parent::triplify($study_id, parent::getVoc() . "outcome", $outcome_uri) . parent::triplifyString($outcome_uri, parent::getVoc() . "type", $this->getString("./type", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "time-frame", $this->getString("./time_frame", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "safety-issue", $this->getString("./safety_issue", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "posting-date", $this->getString("./posting-date", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "population", $this->getString("./population", $outcome)));
                     $groups = @array_shift($outcome->xpath('./group_list'));
                     if ($groups) {
                         foreach ($groups as $group) {
                             parent::addRDF(parent::triplify($outcome_uri, parent::getVoc() . "group", $this->makeGroup($group)));
                         }
                     }
                     // measure list
                     $measures = @array_shift($outcome->xpath('./measure_list'));
                     if ($measures) {
                         foreach ($measures as $measure) {
                             parent::addRDF(parent::triplify($outcome_uri, parent::getVoc() . "measure", $this->makeMeasure($measure)));
                         }
                     }
                     // analysis list
                     $analyses = @array_shift($outcome->xpath('./analysis_list'));
                     if ($analyses) {
                         foreach ($analyses as $analysis) {
                             parent::addRDF(parent::triplify($outcome_uri, parent::getVoc() . "analysis", $this->makeAnalysis($analysis)));
                         }
                     }
                 }
             }
         } catch (Exception $e) {
             echo "Error in parsing outcomes" . PHP_EOL;
         }
         ################################################################################
         # events
         ################################################################################
         try {
             $c_ev = $c_c = 1;
             $reported_events = @array_shift($root->xpath('//reported_events'));
             if ($reported_events) {
                 $rp_id = parent::getRes() . md5($reported_events->asXML());
                 $groups = @array_shift($reported_events->xpath('./group_list'));
                 parent::addRDF(parent::describeIndividual($rp_id, "Reported events for {$nct_id}", parent::getVoc() . "Reported-Events") . parent::describeClass(parent::getVoc() . "Reported-Events", "Reported Events") . parent::triplify($study_id, parent::getVoc() . "reported-events", $rp_id));
                 foreach ($groups as $group) {
                     parent::addRDF(parent::triplify($rp_id, parent::getVoc() . "group", $this->makeGroup($group)));
                 }
                 // events
                 $event_list = array("serious_events" => "Serious Event", "other_events" => "Other Event");
                 foreach ($event_list as $ev => $ev_label) {
                     $et = @array_shift($reported_events->xpath('./' . $ev));
                     if (!$et) {
                         continue;
                     }
                     $ev_uri = parent::getVoc() . str_replace(" ", "-", $ev_label);
                     $categories = @array_shift($et->xpath('./category_list'));
                     foreach ($categories as $category) {
                         $major_title = $this->getString('./title', $category);
                         $major_title_uri = parent::getRes() . md5($major_title);
                         $events = @array_shift($category->xpath('./event_list'));
                         foreach ($events as $event) {
                             $e_uri = parent::getRes() . $this->nct_id . "/{$ev}/" . $c_ev++;
                             $subtitle = (string) $this->getString('./sub_title', $event) . " for " . $this->nct_id;
                             $subtitle_uri = parent::getRes() . md5($subtitle);
                             parent::addRDF(parent::describeIndividual($e_uri, $subtitle, $ev_uri) . parent::describeClass($ev_uri, $ev_label) . parent::triplify($e_uri, parent::getVoc() . "sub-title", $subtitle_uri) . parent::describeIndividual($subtitle_uri, $subtitle, parent::getVoc() . "Event") . parent::describeClass(parent::getVoc() . "Event", "Event") . parent::triplify($e_uri, parent::getVoc() . "major-title", $major_title_uri) . parent::describeClass($major_title_uri, $major_title) . parent::triplify($rp_id, parent::getVoc() . str_replace("_", "-", $ev), $e_uri));
                             $counts = $event->xpath('./counts');
                             foreach ($counts as $c) {
                                 $group_id = $c->attributes()->group_id;
                                 $group_uri = parent::getRes() . $nct_id . "/group/" . $group_id;
                                 $c_uri = $e_uri . "/count/" . $c_c++;
                                 parent::addRDF(parent::describeIndividual($c_uri, $subtitle . " for " . $group_id . " in " . $this->nct_id, parent::getVoc() . "Event-Count") . parent::describeClass(parent::getVoc() . "Event-Count", "Event Count") . parent::triplify($c_uri, parent::getVoc() . "group", $group_uri) . parent::triplify($e_uri, parent::getVoc() . "count", $c_uri) . parent::triplifyString($c_uri, parent::getVoc() . "default-vocabulary", $this->getString('./default_vocab', $et)) . parent::triplifyString($c_uri, parent::getVoc() . "frequency-threshold", $this->getString('./frequency_threshold', $et)) . parent::triplifyString($c_uri, parent::getVoc() . "default-assessment", $this->getString('./default_assessment', $et)) . parent::triplifyString($c_uri, parent::getVoc() . "number-events", $c->attributes()->events) . parent::triplifyString($c_uri, parent::getVoc() . "subjects-affected", $c->attributes()->subjects_affected) . parent::triplifyString($c_uri, parent::getVoc() . "subjects-at-risk", $c->attributes()->subjects_at_risk));
                             }
                         }
                     }
                 }
             }
         } catch (Exception $e) {
             echo "Error in parsing reported events" . PHP_EOL;
         }
         parent::writeRDFBufferToWriteFile();
     }
     $this->setCheckPoint('record');
     $this->setCheckPoint('dataset');
 }
Beispiel #7
0
 function run()
 {
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     // get the snps from pharmgkb
     $snps = explode(",", parent::getParameterValue('files'));
     if ($snps[0] == 'all') {
         $snps = $this->getSNPs();
     } else {
         if ($snps[0] == 'clinical') {
             $snps = $this->getSNPs(true);
         } else {
             if ($snps[0] == 'omim') {
                 $lfile = $ldir . 'snp_omimvar.txt';
                 if (!file_exists($lfile) || parent::getParameterValue('download') == true) {
                     $ret = utils::DownloadSingle('ftp://ftp.ncbi.nlm.nih.gov/snp/Entrez/snp_omimvar.txt', $lfile);
                 }
                 $snps = $this->processOMIMVar($lfile);
             } else {
                 if ($snps[0] == 'pharmgkb') {
                     $lfile = $ldir . 'pharmgkb.snp.zip';
                     if (!file_exists($lfile) || parent::getParameterValue('download') == true) {
                         $ret = utils::DownloadSingle('http://www.pharmgkb.org/download.do?objId=rsid.zip&dlCls=common', $lfile);
                     }
                     $snps = $this->processPharmGKBSnps($lfile);
                 }
             }
         }
     }
     $outfile = $odir . "dbsnp." . parent::getParameterValue('output_format');
     $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
     parent::setWriteFile($outfile, $gz);
     $n = count($snps);
     $z = 0;
     foreach ($snps as $i => $snp) {
         $file = $snp . '.xml.gz';
         $infile = $ldir . $file;
         $rfile = parent::getParameterValue('download_url') . $snp;
         //$outfile = $odir.$snp.".".parent::getParameterValue('output_format');
         // check if exists
         $download = false;
         if (!file_exists($infile)) {
             //trigger_error($lfile." not found. Will attempt to download. ", E_USER_NOTICE);
             parent::setParameterValue('download', true);
         }
         // download
         if (parent::getParameterValue('download') == true) {
             trigger_error("Downloading {$file}", E_USER_NOTICE);
             $ret = utils::downloadSingle($rfile, "compress.zlib://" . $infile, true);
             if ($ret === false) {
                 continue;
             }
         }
         // process
         echo "Processing {$snp} (" . ($i + 1) . "/{$n})" . PHP_EOL;
         $this->parse($infile);
         parent::writeRDFBufferToWriteFile();
         if ($z++ % 10000 == 0) {
             parent::clear();
         }
     }
     parent::getWriteFile()->close();
     // generate the dataset description file
     $source_file = (new DataResource($this))->setURI($rfile)->setTitle("dbSNP " . parent::getDatasetVersion())->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z"))->setFormat("application/xml")->setPublisher("http://www.ncbi.nlm.nih.gov")->setHomepage("http://www.ncbi.nlm.nih.gov/SNP/")->setRights("use-share-modify")->setLicense("http://www.ncbi.nlm.nih.gov/About/disclaimer.html")->setDataset("http://identifiers.org/dbsnp/");
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/dbsnp/dbsnp.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description = $source_file->toRDF() . $output_file->toRDF();
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
 }
Beispiel #8
0
 function process($db)
 {
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     while ($l = parent::getReadFile()->read()) {
         list($nsid, $name) = explode("\t", $l);
         list($ns, $id) = explode(":", $nsid);
         if (isset($this->idlist) and !in_array($id, $this->idlist)) {
             continue;
         }
         if (isset($this->org)) {
             $id = $ns . "_" . $id;
         }
         $uri = $this->getNamespace() . $id;
         parent::addRDF(parent::describeIndividual($uri, $name, parent::getVoc() . ucfirst($db)) . parent::describeClass(parent::getVoc() . ucfirst($db), "KEGG {$db}") . parent::triplifyString($uri, parent::getVoc() . "internal-id", $nsid));
         // now get the entries for each
         $lfile = $ldir . $id . ".txt";
         $rfile = parent::getParameterValue("download_url") . "get/{$nsid}";
         if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
             echo "downloading {$nsid} ";
             $ret = utils::downloadSingle($rfile, $lfile);
             if ($ret === false) {
                 echo "unable to download " . $nsid . " ... skipping" . PHP_EOL;
                 continue;
             }
             echo "done. ";
         }
         echo "parsing {$nsid} ... ";
         $this->parseEntry($lfile);
         parent::writeRDFBufferToWriteFile();
         if ($db === "pathway") {
             $ko = str_replace("map", "ko", $id);
             $lfile = $ldir . $id . ".kgml";
             $rfile = "http://www.kegg.jp/kegg-bin/download?entry=" . $ko . "&format=kgml";
             if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
                 echo "downloading KGML for {$nsid} ";
                 $ret = utils::downloadSingle($rfile, $lfile);
                 if ($ret === false) {
                     echo "unable to download " . $nsid . " ... skipping" . PHP_EOL;
                     continue;
                 }
                 echo "done. ";
             }
             $this->parseKGML($lfile);
             parent::writeRDFBufferToWriteFile();
         }
         echo "done!" . PHP_EOL;
     }
 }
Beispiel #9
0
 function gene_expression()
 {
     $h = explode(",", parent::getReadFile()->read());
     $expected_columns = 8;
     if (($n = count($h)) != $expected_columns) {
         trigger_error("Found {$n} columns in gene file - expecting {$expected_columns}!", E_USER_WARNING);
         return false;
     }
     while ($l = parent::getReadFile()->read(200000)) {
         $data = str_getcsv($l);
         $mgi_symbol = $data[0];
         $mgi_description = $data[1];
         $geneid = $data[2];
         $total_datasets = $data[3];
         $total_ovexp = $data[4];
         $total_underexp = $data[5];
         $p_value = $data[6];
         $expression = $data[7];
         $id = parent::getRes() . md5($geneid . $total_datasets . $total_ovexp . $total_underexp . $p_value . $expression);
         $evidence_id = parent::getRes() . md5($geneid . $total_datasets . $total_ovexp . $total_underexp . $p_value . $expression . "_evidence");
         $label = "Dietary restriction induced " . $expression . "-expression of " . $mgi_symbol . " based on microarray results from " . $total_datasets . " datasets, with p-value " . $p_value;
         $type_label = "Gene " . ucfirst($expression) . " Expression";
         $type = parent::getVoc() . str_replace(" ", "-", $type_label);
         parent::addRDF(parent::describeIndividual($id, $label, $type) . parent::describeClass($type, $type_label) . parent::triplify($id, parent::getVoc() . "gene", "ncbigene:" . $geneid) . parent::triplifyString("ncbigene:" . $geneid, parent::getVoc() . "mgi-gene-symbol", $mgi_symbol) . parent::triplifyString("ncbigene:" . $geneid, parent::getVoc() . "mgi-gene-description", $mgi_description) . parent::triplify($id, parent::getVoc() . "evidence", $evidence_id) . parent::triplifyString($id, parent::getVoc() . "perturbation-context", "dietary restriction") . parent::triplifyString($evidence_id, parent::getVoc() . "total-number-datasets", $total_datasets) . parent::triplifyString($evidence_id, parent::getVoc() . "total-number-datasets-overexpressed", $total_ovexp) . parent::triplifyString($evidence_id, parent::getVoc() . "total-number-datasets-underexpressed", $total_underexp) . parent::triplifyString($evidence_id, parent::getVoc() . "p-value", $p_value));
         parent::writeRDFBufferToWriteFile();
     }
     //while
 }
Beispiel #10
0
 function Run()
 {
     // directory shortcuts
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     if (parent::getParameterValue('omim_api_key') == '') {
         $key_file = parent::getParameterValue('omim_api_key_file');
         if (file_exists($key_file)) {
             $key = trim(file_get_contents($key_file));
             if ($key) {
                 parent::setParameterValue('omim_api_key', $key);
             } else {
                 trigger_error("No API key found in the specified omim key file {$key_file}", E_USER_WARNING);
             }
         } else {
             trigger_error("No OMIM key has been provided either by commmand line or in the expected omim key file {$key_file}", E_USER_WARNING);
         }
     }
     // get the list of mim2gene entries
     $entries = $this->GetListOfEntries($ldir);
     // get the work specified
     $list = trim(parent::getParameterValue('files'));
     if ($list != 'all') {
         // check if a hyphenated list was provided
         if (($pos = strpos($list, "-")) !== FALSE) {
             $start_range = substr($list, 0, $pos);
             $end_range = substr($list, $pos + 1);
             // get the whole list
             $full_list = $this->GetListOfEntries($ldir);
             // now intersect
             foreach ($full_list as $e => $type) {
                 if ($e >= $start_range && $e <= $end_range) {
                     $myentries[$e] = $type;
                 }
             }
             $entries = $myentries;
         } else {
             // for comma separated list
             $b = explode(",", parent::getParameterValue('files'));
             foreach ($b as $e) {
                 $myentries[$e] = '';
             }
             $entries = array_intersect_key($entries, $myentries);
         }
     }
     // set the write file
     $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
     $outfile = 'omim.' . parent::getParameterValue('output_format');
     parent::setWriteFile($odir . $outfile, $gz);
     // declare the mapping method types
     $this->get_method_type(null, true);
     // iterate over the entries
     $i = 0;
     $total = count($entries);
     foreach ($entries as $omim_id => $type) {
         echo "processing " . ++$i . " of {$total} - omim# ";
         $download_file = $ldir . $omim_id . ".json.gz";
         $gzfile = "compress.zlib://{$download_file}";
         // download if the file doesn't exist or we are told to
         if (!file_exists($download_file) || parent::getParameterValue('download') == true) {
             // download using the api
             $url = parent::getParameterValue('omim_api_url') . '&apiKey=' . parent::getParameterValue('omim_api_key') . '&mimNumber=' . $omim_id;
             $buf = file_get_contents($url);
             if (strlen($buf) != 0) {
                 file_put_contents($download_file, $buf);
                 usleep(500000);
                 // limit of 4 requests per second
             }
         }
         // load entry, parse and write to file
         $entry = json_decode(file_get_contents($gzfile), true);
         $omim_id = trim((string) $entry["omim"]["entryList"][0]["entry"]['mimNumber']);
         echo $omim_id;
         $this->ParseEntry($entry, $type);
         parent::writeRDFBufferToWriteFile();
         echo PHP_EOL;
     }
     parent::writeRDFBufferToWriteFile();
     parent::getWriteFile()->close();
     // generate the dataset description file
     $source_file = (new DataResource($this))->setURI(parent::getParameterValue('omim_api_url'))->setTitle("OMIM " . parent::getDatasetVersion())->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z"))->setFormat("application/json")->setPublisher("http://omim.org")->setHomepage("http://omim.org")->setRights("use")->setRights("no-commercial")->setRights("registration-required")->setLicense("http://www.omim.org/help/agreement")->setDataset("http://identifiers.org/omim/");
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/omim/omim.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description = $source_file->toRDF() . $output_file->toRDF();
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     return TRUE;
 }
Beispiel #11
0
 function Parse()
 {
     $l = parent::getReadFile()->read(100000);
     $header = explode("\t", trim(substr($l, 1)));
     if (($c = count($header)) != 54) {
         trigger_erorr("Expecting 54 columns, found {$c}!");
         return FALSE;
     }
     // check # of columns
     while ($l = parent::getReadFile()->read(500000)) {
         $a = explode("\t", trim($l));
         // irefindex identifiers
         $rigid = "irefindex." . $a[34];
         # checksum for interaction
         $rogida = "irefindex." . $a[32];
         # checksum for A
         $rogidb = "irefindex." . $a[33];
         # checksum for B
         $irigid = "irefindex.irigid:" . $a[44];
         # integer id for interaction
         $irogida = "irefindex.irogid:" . $a[42];
         # integer id for A
         $irogidb = "irefindex.irogid:" . $a[43];
         # integer id for B
         $crigid = "irefindex.crigid:" . $a[47];
         # checksum for canonical interaction
         $icrigid = "irefindex.icrigid:" . $a[50];
         # integer id for canonical interaction
         $crogida = "irefindex.crogid:" . $a[45];
         # checksum for A's canonical group
         $crogidb = "irefindex.crogid:" . $a[46];
         # checksum for B's canonical group
         $icrogida = "irefindex.icrogid:" . $a[48];
         # integer for A's canonical group
         $icrogidb = "irefindex.icrogid:" . $a[49];
         # integer for B's canonical group
         // 13 contains the original identifier, the rigid, and the edgetype
         $ids = explode("|", $a[13]);
         if (count($ids) != 3) {
             trigger_error("Expecting 3 entries in column 14");
             print_r($ids);
             exit;
         }
         parent::getRegistry()->parseQName($ids[0], $ns, $id);
         if ($id == '-') {
             // this happens with hprd
             $iid = "hprd:" . substr($ids[1], 6);
         } else {
             $iid = $ns . ":" . $id;
         }
         // get the type
         if ($a[52] == "X") {
             $label = "{$a['0']} - {$a['1']} Interaction";
             $type = "Pairwise-Interaction";
         } else {
             if ($a[52] == "C") {
                 $label = $a[53] . " component complex";
                 #num of participants
                 $type = "Multimeric-Complex";
             } else {
                 if ($a[52] == "Y") {
                     $label = "{$a['0']} homomeric complex";
                     $type = "Homopolymeric-Complex";
                 }
             }
         }
         parent::addRDF(parent::describeIndividual($iid, $label, parent::getVoc() . $type) . parent::describeClass(parent::getVoc() . $type, str_replace("-", " ", $type)));
         // interaction type[52] by method[6]
         unset($method);
         if ($a[6] != '-') {
             $data = $this->ParseStringArray($a[6]);
             $method = trim($data["label"]);
             $qname = trim($data["ns"]) . ":" . trim($data["id"]);
             if ($qname) {
                 parent::addRDF(parent::triplify($iid, parent::getVoc() . "method", $qname) . parent::describeClass($qname, $data['label']));
             }
         }
         parent::addRDF(parent::triplify($iid, "rdfs:seeAlso", "http://wodaklab.org/iRefWeb/interaction/show/" . $a[50]));
         // set the interactors
         for ($i = 0; $i <= 1; $i++) {
             $p = 'a';
             if ($i == 1) {
                 $p = 'b';
             }
             $data = $this->ParseStringArray($a[$i]);
             $interactor = trim($data["ns"]) . ":" . trim($data["id"]);
             parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}", $interactor));
             // biological role
             $role = $a[16 + $i];
             if ($role != '-') {
                 $data = $this->ParseStringArray($role);
                 $qname = trim($data["ns"]) . ":" . trim($data["id"]);
                 if ($qname != "mi:0000") {
                     parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}" . "_biological_role", $qname) . parent::describeClass($qname, $data['label']));
                 }
             }
             // experimental role
             $role = $a[18 + $i];
             if ($role != '-') {
                 $data = $this->ParseStringArray($role);
                 $qname = trim($data["ns"]) . ":" . trim($data["id"]);
                 if ($qname != "mi:0000") {
                     parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}" . "_experimental_role", $qname) . parent::describeClass($qname, $data['label']));
                 }
             }
             // interactor type
             $type = $a[20 + $i];
             if ($type != '-') {
                 $data = $this->ParseStringArray($type);
                 $qname = trim($data["ns"]) . ":" . trim($data["id"]);
                 parent::addRDF(parent::triplify($interactor, "rdf:type", $qname) . parent::describeClass($qname, $data['label']));
             }
         }
         // add the alternatives through the taxon + seq redundant group
         for ($i = 2; $i <= 3; $i++) {
             $taxid = '';
             $rogid = "irefindex." . $a[32 + ($i - 2)];
             parent::addRDF(parent::describeIndividual($rogid, "", parent::getVoc() . "Taxon-Sequence-Identical-Group") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Identical-Group", "Taxon + Sequence Identical Group"));
             $tax = $a[9 + ($i - 2)];
             if ($tax && $tax != '-' && $tax != '-1') {
                 $data = $this->ParseStringArray($tax);
                 $taxid = trim($data["ns"]) . ":" . trim($data["id"]);
                 parent::addRDF(parent::triplify($rogid, parent::getVoc() . "x-taxonomy", $taxid));
             }
             $list = explode("|", $a[3 + ($i - 2)]);
             foreach ($list as $item) {
                 $data = $this->ParseStringArray($item);
                 $ns = trim($data["ns"]);
                 $id = trim($data["id"]);
                 $qname = $ns . ":" . $id;
                 if ($ns && $ns != 'rogid' && $ns != 'irogid' and $id != '-') {
                     parent::addRDF(parent::triplify($rogid, parent::getVoc() . "has-member", $qname));
                     if ($taxid && $taxid != '-' && $taxid != '-1') {
                         parent::addRDF(parent::triplify($qname, parent::getVoc() . "x-taxonomy", $taxid));
                     }
                 }
             }
         }
         // publications
         $list = explode("|", $a[8]);
         foreach ($list as $item) {
             if ($item == '-' && $item != 'pubmed:0') {
                 continue;
             }
             $data = $this->ParseStringArray($item);
             $qname = trim($data["ns"]) . ":" . trim($data["id"]);
             parent::addRDF(parent::triplify($iid, parent::getVoc() . "article", $qname));
         }
         // MI interaction type
         if ($a[11] != '-' && $a[11] != 'NA') {
             $data = $this->ParseStringArray($a[11]);
             $qname = trim($data["ns"]) . ":" . trim($data["id"]);
             parent::addRDF(parent::triplify($iid, "rdf:type", $qname));
             if (!isset($defined[$qname])) {
                 $defined[$qname] = '';
                 parent::addRDF(parent::triplifyString($qname, "rdfs:label", $data['label']));
             }
         }
         // source
         if ($a[12] != '-') {
             $data = $this->ParseStringArray($a[12]);
             $qname = trim($data["ns"]) . ":" . trim($data["id"]);
             parent::addRDF(parent::triplify($iid, parent::getVoc() . "source", $qname));
         }
         // confidence
         $list = explode("|", $a[14]);
         foreach ($list as $item) {
             $data = $this->ParseStringArray($item);
             $ns = trim($data["ns"]);
             $id = trim($data["id"]);
             if ($ns == 'lpr') {
                 //  lowest number of distinct interactions that any one article reported
                 parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "minimum-number-interactions-reported", $id));
             } else {
                 if ($ns == "hpr") {
                     //  higher number of distinct interactions that any one article reports
                     parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "maximum-number-interactions-reported", $id));
                 } else {
                     if ($ns = 'hp') {
                         //  total number of unique PMIDs used to support the interaction
                         parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "number-supporting-articles", $id));
                     }
                 }
             }
         }
         // expansion method
         if ($a[15]) {
             $id = parent::getRes() . md5($a[15]);
             parent::addRDF(parent::describeIndividual($id, $a[15], parent::getVoc() . "Expansion-Method") . parent::describeClass(parent::getVoc() . "Expansion-Method", "Expansion Method") . parent::triplify($iid, parent::getVoc() . "expansion-method", $id));
         }
         // host organism
         if ($a[28] != '-') {
             $data = $this->ParseStringArray($a[28]);
             $qname = trim($data["ns"]) . ":" . trim($data["id"]);
             parent::addRDF(parent::triplify($iid, parent::getVoc() . "host-organism", $qname));
         }
         // @todo add to record
         // created 2010/05/18
         $date = str_replace("/", "-", $a[30]) . "T00:00:00Z";
         parent::addRDF(parent::triplifyString($iid, "dc:created", $date, "xsd:dateTime"));
         // taxon-sequence identical interaction group
         parent::addRDF(parent::triplify($iid, parent::getVoc() . "taxon-sequence-identical-interaction", $rigid) . parent::triplify($rigid, "rdf:type", parent::getVoc() . "Taxon-Sequence-Identical-Interaction") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Identical-Interaction", "Taxon + Sequence Identical Interaction") . parent::triplify($rigid, parent::getVoc() . "irigid", $irigid) . parent::triplify($rigid, parent::getVoc() . "interactor-a", $rogida) . parent::triplify($rogida, parent::getVoc() . "irogid", $irogida) . parent::triplify($rigid, parent::getVoc() . "interactor-b", $rogidb) . parent::triplify($rogidb, parent::getVoc() . "irogid", $irogidb) . parent::triplify($rogida, parent::getVoc() . "canonical-group", $crogida) . parent::triplify($rogidb, parent::getVoc() . "canonical-group", $crogidb) . parent::triplify($rigid, parent::getVoc() . "taxon-sequence-similar-interaction", $crigid) . parent::triplify($crigid, "rdf:type", parent::getVoc() . "Taxon-Sequence-Canonical-Interaction") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Canonical-Interaction", "Taxon + Sequence Canonical Interaction") . parent::triplify($crigid, parent::getVoc() . "icrigid", $icrigid) . parent::triplify($crigid, parent::getVoc() . "interactor-a-canonical-group", $crogida) . parent::triplify($crogida, "rdf:type", parent::getVoc() . "Taxon-Sequence-Similar-Group") . parent::triplify($crogida, parent::getVoc() . "icrogid", $icrogida) . parent::triplify($crigid, parent::getVoc() . "interactor-b-canonical-group", $crogidb) . parent::triplify($crogidb, "rdf:type", parent::getVoc() . "Taxon-Sequence-Similar-Group") . parent::triplify($crogidb, parent::getVoc() . "icrogid", $icrogidb) . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Similar-Group", "Taxon + Sequence Similar Group"));
         parent::writeRDFBufferToWriteFile();
     }
 }
Beispiel #12
0
 function psiblast()
 {
     while ($l = $this->GetReadFile()->Read(2048)) {
         $a = explode("\t", trim($l));
         $id1 = $a[0];
         $id2 = $a[7];
         $id = "aln_{$id1_}{$id2}";
         $this->AddRDF(parent::describeIndividual($this->getRes() . $id, "psiblast alignment between {$id1} and {$id2}", $this->getVoc() . "PSI-BLAST-Alignment") . parent::describeClass($this->getVoc() . "PSI-BLAST-Alignment", "PSI-Blast Alignment") . parent::triplify($this->getRes() . $id, $this->getVoc() . "query", $this->getNamespace() . $id1) . parent::triplify($this->getRes() . $id, $this->getVoc() . "target", $this->getNamespace() . $id2) . parent::triplifyString($this->getRes() . $id, $this->getVoc() . "query-start", $a[1]) . parent::triplifyString($this->getRes() . $id, $this->getVoc() . "query-stop", $a[2]) . parent::triplifyString($this->getRes() . $id, $this->getVoc() . "target-start", $a[3]) . parent::triplifyString($this->getRes() . $id, $this->getVoc() . "target-stop", $a[4]) . parent::triplifyString($this->getRes() . $id, $this->getVoc() . "percent-aligned", $a[5]) . parent::triplifyString($this->getRes() . $id, $this->getVoc() . "score", $a[6]) . parent::triplifyString($this->getRes() . $id, $this->getVoc() . "is-encoded-by", "taxon:" . $a[8]) . parent::describeProperty($this->getVoc() . "target-start", "Relationship between an SGD sequence alignment and its target sequence start position") . parent::describeProperty($this->getVoc() . "target-stop", "Relationship between an SGD sequence alignment and its target sequence stop position") . parent::describeProperty($this->getVoc() . "score", "Relationship between an SGD sequence alignment and its score") . parent::describeProperty($this->getVoc() . "percent-aligned", "Relationship between an SGD sequence alignment and its percent-aligned value") . parent::describeProperty($this->getVoc() . "is-encoded-by", "Relationship between an SGD sequence alignment and the taxon the aligned sequences are encoded by"));
         parent::writeRDFBufferToWriteFile();
     }
     //while
     return TRUE;
 }
Beispiel #13
0
 function parseDrugEntry(&$xml)
 {
     $declared = null;
     // a list of all the entities declared
     $counter = 1;
     $x = $xml->GetXMLRoot();
     $dbid = (string) $x->{"drugbank-id"};
     $did = "drugbank:" . $dbid;
     $name = (string) $x->name;
     $type = ucfirst((string) str_replace(" ", "-", $x->attributes()->type));
     $type_label = ucfirst($x->attributes()->type);
     $description = null;
     if (isset($this->id_list)) {
         if (!isset($this->id_list[$dbid])) {
             return;
         }
         unset($this->id_list[$dbid]);
     }
     echo "Processing {$dbid}" . PHP_EOL;
     if (isset($x->description) && $x->description != '') {
         $description = trim((string) $x->description);
     }
     parent::addRDF(parent::describeIndividual($did, $name, parent::getVoc() . "Drug", $name, $description) . parent::describeClass(parent::getVoc() . "Drug", "Drug") . parent::triplify($did, "owl:sameAs", "http://identifiers.org/drugbank/" . $dbid) . parent::triplify($did, "rdfs:seeAlso", "http://www.drugbank.ca/drugs/" . $dbid) . parent::triplify($did, "rdf:type", parent::getVoc() . $type) . parent::describeClass(parent::getVoc() . $type, $type_label));
     foreach ($x->{'drugbank-id'} as $id) {
         parent::addRDF(parent::triplifyString($did, parent::getVoc() . "drugbank-id", $id));
     }
     if (isset($x->{'cas-number'})) {
         parent::addRDF(parent::triplify($did, parent::getVoc() . "x-cas", "cas:" . $x->{'cas-number'}));
     }
     $literals = array("indication", "pharmacodynamics", "mechanism-of-action", "toxicity", "biotransformation", "absorption", "half-life", "protein-binding", "route-of-elimination", "volume-of-distribution", "clearance");
     foreach ($literals as $l) {
         if (isset($x->{$l}) and $x->{$l} != '') {
             $lid = parent::getRes() . md5($l . $x->{$l});
             parent::addRDF(parent::describeIndividual($lid, "{$l} for {$did}", parent::getVoc() . ucfirst($l), "{$l} for {$did}", $x->{$l}) . parent::describeClass(parent::getVoc() . ucfirst($l), ucfirst(str_replace("-", " ", $l))) . parent::triplify($did, parent::getVoc() . $l, $lid));
         }
     }
     // TODO:: Replace the next two lines
     $this->AddList($x, $did, "groups", "group", parent::getVoc() . "group");
     $this->AddList($x, $did, "categories", "category", parent::getVoc() . "category");
     if (isset($x->classification)) {
         foreach ($x->classification->children() as $k => $v) {
             $cid = parent::getRes() . md5($v);
             parent::addRDF(parent::describeIndividual($cid, $v, parent::getVoc() . "Drug-Classification-Category") . parent::describeClass(parent::getVoc() . "Drug-Classification-Category", "Drug Classification Category") . parent::triplify($did, parent::getVoc() . "drug-classification-category", $cid));
         }
     }
     $this->addLinkedResource($x, $did, 'atc-codes', 'atc-code', 'atc');
     $this->addLinkedResource($x, $did, 'ahfs-codes', 'ahfs-code', 'ahfs');
     // taxonomy
     $this->AddText($x, $did, "taxonomy", "kingdom", parent::getVoc() . "kingdom");
     // substructures
     $this->AddText($x, $did, "taxonomy", "substructures", parent::getVoc() . "substructure", "substructure");
     // synonyms
     $this->AddCategory($x, $did, "synonyms", "synonym", parent::getVoc() . "synonym");
     // brand names
     $this->AddCategory($x, $did, "international-brands", "international-brand", parent::getVoc() . "brand");
     // salt
     if (isset($x->salts->salt)) {
         foreach ($x->salts->salt as $s) {
             $sid = parent::getPrefix() . ':' . $s->{'drugbank-id'};
             parent::addRDF(parent::describeIndividual($sid, $s->name, parent::getVoc() . "Salt") . parent::describeClass(parent::getVoc() . "Salt", "Salt") . parent::triplify($did, parent::getVoc() . "salt", $sid) . parent::triplify($sid, parent::getVoc() . "x-cas", "cas:" . $s->{'cas-number'}) . parent::triplify($sid, parent::getVoc() . "x-inchikey", "inchikey:" . $s->{'inchikey'}));
         }
     }
     // mixtures
     // <mixtures><mixture><name>Cauterex</name><ingredients>dornase alfa + fibrinolysin + gentamicin sulfate</ingredients></mixture>
     if (isset($x->mixtures)) {
         $id = 0;
         foreach ($x->mixtures->mixture as $item) {
             if (isset($item)) {
                 $o = $item;
                 $mid = parent::getRes() . str_replace(" ", "-", $o->name[0]);
                 parent::addRDF(parent::triplify($did, parent::getVoc() . "mixture", $mid) . parent::describeIndividual($mid, $o->name[0], parent::getVoc() . "Mixture") . parent::describeClass(parent::getVoc() . "Mixture", "mixture") . parent::triplifyString($mid, $this->getVoc() . "ingredients", "" . $o->ingredients[0]));
                 $a = explode(" + ", $o->ingredients[0]);
                 foreach ($a as $b) {
                     $b = trim($b);
                     $iid = parent::getRes() . str_replace(" ", "-", $b);
                     parent::addRDF(parent::describeClass($iid, $b, parent::getVoc() . "Ingredient") . parent::describeClass(parent::getVoc() . "Ingredient", "Ingredient") . parent::triplify($mid, parent::getVoc() . "ingredient", $iid));
                 }
             }
         }
     }
     // packagers
     // <packagers><packager><name>Cardinal Health</name><url>http://www.cardinal.com</url></packager>
     if (isset($x->packagers)) {
         foreach ($x->packagers as $items) {
             if (isset($items->packager)) {
                 foreach ($items->packager as $item) {
                     $pid = parent::getRes() . md5($item->name);
                     parent::addRDF(parent::triplify($did, parent::getVoc() . "packager", $pid));
                     if (!isset($defined[$pid])) {
                         $defined[$pid] = '';
                         parent::addRDF(parent::describe($pid, "" . $item->name[0]));
                         if (strstr($item->url, "http://") && $item->url != "http://BASF Corp.") {
                             parent::addRDF($this->triplify($pid, "rdfs:seeAlso", "" . $item->url[0]));
                         }
                     }
                 }
             }
         }
     }
     // manufacturers
     $this->AddText($x, $did, "manufacturers", "manufacturer", parent::getVoc() . "manufacturer");
     // @TODO RESOURCE
     // prices
     if (isset($x->prices->price)) {
         foreach ($x->prices->price as $product) {
             $pid = parent::getRes() . md5($product->description);
             parent::addRDF(parent::describeIndividual($pid, $product->description, parent::getVoc() . "Pharmaceutical", $product->description) . parent::describeClass(parent::getVoc() . "Pharmaceutical", "pharmaceutical") . parent::triplifyString($pid, parent::getVoc() . "price", "" . $product->cost, "xsd:float") . parent::triplify($did, parent::getVoc() . "product", $pid));
             $uid = parent::getVoc() . md5($product->unit);
             parent::addRDF(parent::describeIndividual($uid, $product->unit, parent::getVoc() . "Unit", $product->unit) . parent::describeClass(parent::getVoc() . "Unit", "unit") . parent::triplify($pid, parent::getVoc() . "form", $uid));
         }
     }
     // dosages <dosages><dosage><form>Powder, for solution</form><route>Intravenous</route><strength></strength></dosage>
     if (isset($x->dosages->dosage)) {
         foreach ($x->dosages->dosage as $dosage) {
             $id = parent::getRes() . md5($dosage->strength . $dosage->form . $dosage->route);
             $label = ($dosage->strength != '' ? $dosage->strength . " " : "") . $dosage->form . " form with " . $dosage->route . " route";
             parent::addRDF(parent::describeIndividual($id, $label, parent::getVoc() . "Dosage") . parent::describeClass(parent::getVoc() . "Dosage", "Dosage") . parent::triplify($did, parent::getVoc() . "dosage", $id));
             $rid = parent::getVoc() . md5($dosage->route);
             $this->typify($id, $rid, "Route", "" . $dosage->route);
             $fid = parent::getVoc() . md5($dosage->form);
             $this->typify($id, $fid, "Form", "" . $dosage->form);
             if ($dosage->strength != '') {
                 parent::addRDF(parent::triplifyString($id, parent::getVoc() . "strength", $dosage->strength));
             }
         }
     }
     // experimental-properties
     $props = array("experimental-properties", "calculated-properties");
     foreach ($props as $prop) {
         $subtype = substr($prop, 0, strpos("-", $prop));
         if (isset($x->{$prop})) {
             foreach ($x->{$prop} as $properties) {
                 foreach ($properties as $property) {
                     $type = (string) $property->kind;
                     $value = (string) $property->value;
                     $type_uri = parent::getVoc() . ucfirst(str_replace(" ", "-", $type));
                     $id = parent::getRes() . $prop . "-" . $dbid . "-" . $counter++;
                     $label = $property->kind . ": {$value}" . ($property->source == '' ? '' : " from " . $property->source);
                     parent::addRDF(parent::describeIndividual($id, $label, $type_uri) . parent::describeClass($type_uri, $type, parent::getVoc() . ucfirst($prop)) . parent::describeClass(parent::getVoc() . ucfirst($prop), str_replace("-", " ", $prop)) . parent::triplifyString($id, $this->getVoc() . "value", $value) . parent::triplify($did, $this->getVoc() . $prop, $id));
                     // Source
                     if (isset($property->source)) {
                         foreach ($property->source as $source) {
                             $s = (string) $source;
                             if ($s == '') {
                                 continue;
                             }
                             $sid = parent::getRes() . md5($s);
                             parent::addRDF(parent::describeIndividual($sid, $s, parent::getVoc() . "Source") . parent::describeClass(parent::getVoc() . "Source", "Source") . parent::triplify($id, parent::getVoc() . "source", $sid));
                         }
                     }
                 }
             }
         }
     }
     // identifiers
     // <patents><patent><number>RE40183</number><country>United States</country><approved>1996-04-09</approved>        <expires>2016-04-09</expires>
     if (isset($x->patents->patent)) {
         foreach ($x->patents->patent as $patent) {
             $id = "uspto:" . $patent->number;
             parent::addRDF(parent::triplify($did, $this->getVoc() . "patent", $id) . parent::describeIndividual($id, $patent->country . " patent " . $patent->number, $this->getVoc() . "Patent") . parent::describeClass(parent::getVoc() . "Patent", "patent") . parent::triplifyString($id, $this->getVoc() . "approved", "" . $patent->approved) . parent::triplifyString($id, $this->getVoc() . "expires", "" . $patent->expires));
             $cid = parent::getRes() . md5($patent->country);
             $this->typify($id, $cid, "Country", "" . $patent->country);
         }
     }
     // partners
     $partners = array('target', 'enzyme', 'transporter', 'carrier');
     foreach ($partners as $partner) {
         $plural = $partner . 's';
         if (isset($x->{$plural})) {
             foreach ($x->{$plural} as $list) {
                 foreach ($list->{$partner} as $item) {
                     $this->parsePartnerRelation($did, $item, $partner);
                     parent::writeRDFBufferToWriteFile();
                 }
             }
         }
     }
     // drug-interactions
     $y = (int) substr($dbid, 2);
     if (isset($x->{"drug-interactions"})) {
         foreach ($x->{"drug-interactions"} as $ddis) {
             foreach ($ddis->{"drug-interaction"} as $ddi) {
                 $dbid2 = $ddi->{'drugbank-id'};
                 if ($dbid < $dbid2) {
                     // don't repeat
                     $ddi_id = parent::getRes() . $dbid . "_" . $dbid2;
                     parent::addRDF(parent::triplify("drugbank:" . $dbid, parent::getVoc() . "ddi-interactor-in", "" . $ddi_id) . parent::triplify("drugbank:" . $dbid2, parent::getVoc() . "ddi-interactor-in", "" . $ddi_id) . parent::describeIndividual($ddi_id, "DDI between {$name} and " . $ddi->name . " - " . $ddi->description, parent::getVoc() . "Drug-Drug-Interaction") . parent::describeClass(parent::getVoc() . "Drug-Drug-Interaction", "drug-drug interaction"));
                 }
             }
         }
     }
     // food-interactions
     $this->AddText($x, $did, "food-interactions", "food-interaction", parent::getVoc() . "food-interaction");
     // affected-organisms
     $this->AddCategory($x, $did, "affected-organisms", "affected-organism", parent::getVoc() . "affected-organism");
     //  <external-identifiers>
     if (isset($x->{"external-identifiers"})) {
         foreach ($x->{"external-identifiers"} as $objs) {
             foreach ($objs as $obj) {
                 $ns = $this->NSMap($obj->resource);
                 $id = $obj->identifier;
                 if ($ns == "genecards") {
                     $id = str_replace(array(" "), array("_"), $id);
                 }
                 parent::addRDF(parent::triplify($did, parent::getVoc() . "x-{$ns}", "{$ns}:{$id}"));
                 if ($ns == "pubchemcompound") {
                     parent::addRDF(parent::triplify("{$ns}:{$id}", "skos:exactMatch", "http://rdf.ncbi.nlm.nih.gov/pubchem/compound/{$id}"));
                 }
             }
         }
     }
     // <external-links>
     if (isset($x->{"external-links"})) {
         foreach ($x->{"external-links"}->{'external-link'} as $el) {
             if (strpos($el->url, 'http') !== false) {
                 parent::addRDF(parent::triplify($did, "rdfs:seeAlso", "" . $el->url));
             }
         }
     }
     parent::writeRDFBufferToWriteFile();
 }
Beispiel #14
0
 function genes($file)
 {
     $xml = new CXML($file);
     while ($xml->parse("DisorderList") == TRUE) {
         $x = $xml->GetXMLRoot();
         foreach ($x->Disorder as $d) {
             $orphanet_id = parent::getNamespace() . (string) $d->OrphaNumber;
             $disorder_name = (string) $d->Name;
             foreach ($d->DisorderGeneAssociationList->DisorderGeneAssociation as $dga) {
                 // gene
                 $gene = $dga->Gene;
                 $gene_id = parent::getNamespace() . (string) $gene->OrphaNumber;
                 $gene_internal_id = (string) $gene->attributes()->id;
                 $gene_label = (string) $gene->Name;
                 $gene_symbol = (string) $gene->Symbol;
                 parent::addRDF(parent::describeIndividual($gene_id, $gene_label, parent::getVoc() . "Gene") . parent::describeClass(parent::getVoc() . "Gene", "orphanet gene") . parent::triplifyString($gene_id, parent::getVoc() . "symbol", $gene_symbol));
                 foreach ($gene->SynonymList as $s) {
                     $synonym = (string) $s->Synonym;
                     parent::addRDF(parent::triplifyString($gene_id, parent::getVoc() . "synonym", $synonym));
                 }
                 foreach ($gene->ExternalReferenceList as $erl) {
                     $er = $erl->ExternalReference;
                     $db = (string) $er->Source;
                     $db = parent::getRegistry()->getPreferredPrefix($db);
                     $id = (string) $er->Reference;
                     $xref = "{$db}:{$id}";
                     parent::addRDF(parent::triplify($gene_id, parent::getVoc() . "x-{$db}", $xref));
                 }
                 $dga_id = parent::getRes() . (string) $d->OrphaNumber . "_" . md5($dga->asXML());
                 $ga = $dga->DisorderGeneAssociationType;
                 $ga_id = parent::getNamespace() . (string) $ga->attributes()->id;
                 $ga_label = (string) $ga->Name;
                 $s = $dga->DisorderGeneAssociationStatus;
                 $s_id = parent::getNamespace() . (string) $s->attributes()->id;
                 $s_label = (string) $s->Name;
                 parent::addRDF(parent::describeIndividual($dga_id, "{$ga_label} {$gene_label} in {$disorder_name} ({$s_label})", $ga_id) . parent::describeClass($ga_id, $ga_label, parent::getVoc() . "Disorder-Gene-Association") . parent::triplify($dga_id, parent::getVoc() . "status", $s_id) . parent::describeClass($s_id, $s_label, parent::getVoc() . "Disorder-Gene-Association-Status") . parent::triplify($dga_id, parent::getVoc() . "disorder", $orphanet_id) . parent::describeIndividual($orphanet_id, $disorder_name, parent::getVoc() . "Disorder") . parent::triplify($dga_id, parent::getVoc() . "gene", $gene_id));
             }
             parent::writeRDFBufferToWriteFile();
         }
     }
     unset($xml);
 }
Beispiel #15
0
 private function geneinfo()
 {
     $i = 1;
     $header = $this->GetReadFile()->Read(200000);
     while ($aLine = $this->GetReadFile()->Read(200000)) {
         if ($i++ % 1000 == 0) {
             parent::clear();
         }
         $a = $splitLine = explode("\t", $aLine);
         if (count($splitLine) == 15) {
             $taxid = "taxon:" . trim($splitLine[0]);
             if (isset($this->taxids) and !isset($this->taxids[trim($splitLine[0])])) {
                 continue;
             }
             $aGeneId = trim($splitLine[1]);
             $geneid = "ncbigene:" . trim($splitLine[1]);
             $symbol = addslashes(stripslashes(trim($splitLine[2])));
             $symbolid = "symbol:{$symbol}";
             $locusTag = trim($splitLine[3]);
             $symbols_arr = explode("|", $splitLine[4]);
             $dbxrefs_arr = explode("|", $splitLine[5]);
             $chromosome = trim($splitLine[6]);
             $map_location = trim($splitLine[7]);
             $description = addslashes(stripslashes(trim($splitLine[8])));
             $type_of_gene = trim($splitLine[9]);
             $symbol_authority = addslashes(stripslashes(trim($splitLine[10])));
             $symbol_auth_full_name = addslashes(stripslashes(trim($splitLine[11])));
             $nomenclature_status = addslashes(stripslashes(trim($splitLine[12])));
             $other_designations = addslashes(stripslashes(trim($splitLine[13])));
             $mod_date = date_parse(trim($splitLine[14]));
             //check for a valid symbol
             if ($symbol != "NEWENTRY") {
                 $this->AddRDF(parent::describeIndividual($geneid, "{$description} ({$symbolid}, {$taxid})", $this->getVoc() . "Gene") . parent::triplify($geneid, $this->getVoc() . "x-taxonomy", $taxid) . parent::triplifyString($geneid, $this->getVoc() . "symbol", $symbol) . parent::triplifyString($geneid, $this->getVoc() . "locus", addslashes(stripslashes($locusTag))) . parent::describeClass($this->getVoc() . "Gene", "NCBI Gene gene"));
                 if ($type_of_gene != '-') {
                     $this->AddRDF(parent::triplify($geneid, "rdf:type", $this->getVoc() . ucfirst($type_of_gene) . "-Gene") . parent::describeClass($this->getVoc() . ucfirst($type_of_gene) . "-Gene", ucfirst($type_of_gene) . " Gene"));
                 }
                 //symbol synonyms
                 foreach ($symbols_arr as $s) {
                     if ($s != "-") {
                         $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "symbol-synonym", addslashes(stripslashes($s))));
                     }
                 }
                 //dbxrefs
                 foreach ($dbxrefs_arr as $dbx) {
                     if ($dbx != "-") {
                         $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "dbxref", $dbx));
                     }
                 }
                 //chromosome
                 if ($chromosome != "-") {
                     $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "chromosome", $chromosome));
                 }
                 //map location
                 if ($map_location != "-") {
                     $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "map-location", $map_location));
                 }
                 //description
                 if ($description != "-") {
                     $this->AddRDF(parent::triplifyString($geneid, "dc:description", $description));
                 }
                 //nomenclature authority
                 if ($symbol_authority != "-") {
                     $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "nomenclature-authority", $symbol_authority));
                     if ($symbol_auth_full_name != "-") {
                         $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "nomenclature-authority-fullname", $symbol_auth_full_name));
                     }
                 }
                 //nomenclature status
                 if ($nomenclature_status != "-") {
                     $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "nomenclature-status", $nomenclature_status));
                 }
                 //other designations
                 if ($other_designations != "-") {
                     foreach (explode("|", $other_designations) as $d) {
                         $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "other-designation", $d));
                     }
                 }
                 //modification date
                 if ($mod_date != "-") {
                     $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "modification-date", $mod_date["year"] . "-" . $mod_date["month"] . "-" . $mod_date["day"]));
                 }
             }
         }
         parent::writeRDFBufferToWriteFile();
     }
     // while
 }
Beispiel #16
0
 function process($file)
 {
     $z = 1;
     while ($l = parent::getReadFile()->read(100000)) {
         if ($z % 100000 == 0) {
             parent::clear();
         }
         if ($l[0] == "!") {
             continue;
         }
         $fields = explode("\t", $l);
         if (count($fields) != 17) {
             trigger_error("Expected 17 columns, but found " . count($fields), E_USER_ERROR);
             return false;
         }
         //get the Go id
         $db = $fields[0];
         $id = $fields[1];
         $symbol = $fields[2];
         $qualifier = $fields[3];
         $goid = substr($fields[4], 3);
         $refs = $this->getDbReferences($fields[5]);
         $eco = $this->getEvidenceCodeLabelArr($fields[6]);
         $aspect = $this->getAspect($fields[8]);
         $label = $fields[9];
         $synonyms = explode("|", $fields[10]);
         $taxid = $fields[12];
         $date = $this->parseDate($fields[13]);
         $assignedBy = $fields[14];
         //entity id
         $eid = $this->getdbURI($db, $id);
         if (!$eid) {
             print_r($fields);
             continue;
         }
         parent::addRDF(parent::describeIndividual($eid, $label, parent::getVoc() . "GO-Annotation") . parent::describeClass(parent::getVoc() . "GO-Annotation", "GO Annotation") . parent::triplifyString($eid, parent::getVoc() . "symbol", $symbol));
         parent::addRDF(parent::triplify($eid, parent::getVoc() . "x-taxonomy", $taxid));
         foreach ($synonyms as $s) {
             if (!empty($s)) {
                 parent::addRDF(parent::triplifyString($eid, parent::getVoc() . "synonym", $s));
             }
         }
         $rel = $aspect;
         if ($qualifier == 'NOT') {
             if ($aspect == 'process') {
                 $rel = 'not-in-process';
             }
             if ($aspect == 'function') {
                 $rel = 'not-has-function';
             }
             if ($aspect == 'component') {
                 $rel = 'not-in-component';
             }
         }
         parent::addRDF(parent::describeObjectProperty(parent::getVoc() . $rel, str_replace("-", " ", $rel)) . parent::triplify($eid, parent::getVoc() . $rel, "go:" . $goid));
         $type = key($eco);
         $aid = parent::getRes() . $file . "_" . $z++;
         parent::addRDF(parent::describeObjectProperty(parent::getVoc() . "go-annotation", "GO annotation") . parent::triplify($eid, parent::getVoc() . "go-annotation", $aid));
         $cat = parent::getRes() . md5($aspect);
         parent::addRDF(parent::describeIndividual($aid, "{$id}-go:{$goid} association", parent::getVoc() . "GO-Annotation") . parent::triplify($aid, parent::getVoc() . "target", $eid) . parent::triplify($aid, parent::getVoc() . "go-term", "go:" . $goid) . parent::triplify($aid, parent::getVoc() . "evidence", "eco:" . $eco[$type][1]) . parent::triplify($aid, parent::getVoc() . "go-category", $cat) . parent::describeClass($cat, $aspect) . parent::triplifyString($aid, parent::getVoc() . "assigned-by", $assignedBy));
         if ($date != '') {
             parent::addRDF(parent::triplifyString($aid, parent::getVoc() . "entry-date", $date . "T00:00:00Z", "xsd:dateTime"));
         }
         foreach ($refs as $ref) {
             $b = explode(":", $ref);
             if ($b[0] == 'PMID') {
                 parent::addRDF(parent::triplify($aid, parent::getVoc() . "article", "pubmed:" . $b[1]));
             }
         }
         //write RDF to file
         parent::writeRDFBufferToWriteFile();
     }
 }
Beispiel #17
0
 function functional_descriptions()
 {
     while ($l = $this->getReadFile()->read(2000000)) {
         if ($l[0] == "#") {
             continue;
         }
         if (strstr($l, "gene_id")) {
             continue;
         }
         // gene_id public_name molecular_name concise_description provisional_description detailed_description automated_description gene_class_description
         $a = explode("\t", $l);
         if (count($a) != 8) {
             trigger_error("Found one row that only has " . count($a) . " columns, expecting 8", E_USER_ERROR);
             continue;
         }
         $id = parent::getNamespace() . $a[0];
         $label = $a[1] . ($a[2] ? " (" . $a[2] . ")" : "");
         parent::addRDF(parent::describeIndividual($id, $label, parent::getVoc() . "Gene") . parent::describeClass(parent::getVoc() . "Gene", "Wormbase Gene") . parent::triplifyString($id, parent::getVoc() . "concise-description", $a[3]) . parent::triplifyString($id, parent::getVoc() . "provisional-description", $a[4]) . parent::triplifyString($id, parent::getVoc() . "detailed-description", $a[5]) . parent::triplifyString($id, parent::getVoc() . "automated-description", $a[6]) . parent::triplifyString($id, parent::getVoc() . "gene-class-description", trim($a[7])));
         parent::writeRDFBufferToWriteFile();
     }
 }
Beispiel #18
0
 function Run()
 {
     $sp = trim(parent::getParameterValue('files'));
     if ($sp == 'all') {
         $files = $this->getPackageMap();
     } else {
         $s_a = explode(",", $sp);
         $pm = $this->getPackageMap();
         $files = array();
         foreach ($s_a as $a) {
             if (array_key_exists($a, $pm)) {
                 $files[$a] = $pm[$a];
             }
         }
     }
     //else
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $dd = '';
     //now iterate over the files array
     $year = parent::getParameterValue('year');
     foreach ($files as $k => $fpattern) {
         $file = str_replace("YEAR", $year, $fpattern);
         $lfile = $ldir . $file;
         $rfile = parent::getParameterValue("download_url") . $file;
         // download if necessary
         if (!file_exists($lfile) || parent::getParameterValue('download') == "true") {
             echo "Downloading {$file} ... ";
             $ret = utils::downloadSingle($rfile, $lfile);
             if ($ret === FALSE) {
                 trigger_error("Unable to get {$file}", E_USER_ERROR);
                 continue;
             }
             echo "done!" . PHP_EOL;
         }
         //set the outfile
         $ofile = "mesh_" . $k . "." . parent::getParameterValue('output_format');
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
         echo "processing {$k} ...";
         parent::setReadFile($lfile, FALSE);
         parent::setWriteFile($odir . $ofile, $gz);
         $fnx = $k;
         $this->{$fnx}();
         parent::writeRDFBufferToWriteFile();
         parent::getWriteFile()->close();
         echo "done!" . PHP_EOL;
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("MeSH")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/x-mesh-record")->setPublisher("http://www.nlm.nih.gov")->setHomepage("http://www.nlm.nih.gov/mesh/")->setRights("use")->setLicense("http://www.nlm.nih.gov/databases/download.html")->setDataset("http://identifiers.org/mesh/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = parent::getDate(filemtime($odir . $ofile));
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/mesh/mesh.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dd .= $source_file->toRDF() . $output_file->toRDF();
     }
     //foreach
     parent::setWriteFile($odir . $this->getBio2RDFReleaseFile($this->getNamespace()));
     parent::getWriteFile()->write($dd);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #19
0
 function Parse($xml)
 {
     // state the dataset info
     foreach ($xml->release->dbinfo as $o) {
         $db = $o->attributes()->dbname . " v" . $o->attributes()->version . " (" . $o->attributes()->entry_count . " entries) [" . $o->attributes()->file_date . "]";
         parent::addRDF(parent::triplifyString(parent::getDatasetURI(), parent::getVoc() . "contains", $db));
         if ((string) $o->attributes()->dbname === "INTERPRO") {
             parent::setDatasetVersion($o->attributes()->version);
         }
     }
     // get a potential id list
     if (parent::getParameterValue("id_list") != '') {
         $id_list = explode(",", parent::getParameterValue("id_list"));
     }
     // now interate over the entries
     foreach ($xml->interpro as $o) {
         parent::writeRDFBufferToWriteFile();
         $interpro_id = $o->attributes()->id;
         if (isset($id_list) && !in_array($interpro_id, $id_list)) {
             continue;
         }
         echo "Processing {$interpro_id}" . PHP_EOL;
         $name = $o->name;
         $short_name = $o->attributes()->short_name;
         $type = $o->attributes()->type;
         $s = parent::getNamespace() . $interpro_id;
         //echo "Adding... $s rdfs:label $name ($short_name) $type [$s]".PHP_EOL;
         parent::addRDF(parent::describeIndividual($s, "{$name} ({$short_name}) {$type}", parent::getVoc() . $type));
         // get the pubs
         unset($pubs);
         foreach ($o->pub_list->publication as $p) {
             $pid = (string) $p->attributes()->id;
             if (isset($p->db_xref)) {
                 if ($p->db_xref->attributes()->db == "PUBMED") {
                     $pmid = (string) $p->db_xref->attributes()->dbkey;
                     $pubs['pid'][] = '<cite idref="' . $pid . '"/>';
                     $pubs['pmid'][] = '<a href="http://www.ncbi.nlm.nih.gov/pubmed/' . $pmid . '">pubmed:' . $pmid . '</a>';
                     parent::addRDF(parent::triplify($s, parent::getVoc() . "x-pubmed", "pubmed:{$pmid}"));
                 }
             }
         }
         $abstract = (string) $o->abstract->p->asXML();
         if (isset($pubs)) {
             $abstract = str_replace($pubs['pid'], $pubs['pmid'], $abstract);
         }
         parent::addRDF(parent::triplifyString($s, "dc:description", $abstract));
         if (isset($o->example_list)) {
             foreach ($o->example_list->example as $example) {
                 $db = (string) $example->db_xref->attributes()->db;
                 $id = (string) $example->db_xref->attributes()->dbkey;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "example-entry", "{$db}:{$id}"));
             }
         }
         if (isset($o->parent_list->rel_ref)) {
             foreach ($o->parent_list->rel_ref as $parent) {
                 $id = (string) $parent->attributes()->ipr_ref;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "parent", "interpro:{$id}"));
             }
         }
         if (isset($o->child->rel_ref)) {
             foreach ($o->child->rel_ref as $child) {
                 $id = (string) $child->attributes()->ipr_ref;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "child", "interpro:{$id}"));
             }
         }
         if (isset($o->contains->rel_ref)) {
             foreach ($o->contains->rel_ref as $contains) {
                 $id = (string) $contains->attributes()->ipr_ref;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "contains", "interpro:{$id}"));
             }
         }
         if (isset($o->found_in->rel_ref)) {
             foreach ($o->found_in->rel_ref as $f) {
                 $id = (string) $f->attributes()->ipr_ref;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "found-in", "interpro:{$id}"));
             }
         }
         if (isset($o->sec_list->sec_ac)) {
             foreach ($o->sec_ac as $s) {
                 $id = (string) $s->attributes()->acc;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "secondary-accession", "interpro:{$id}"));
             }
         }
         // xrefs
         if (isset($o->member_list->dbxref)) {
             foreach ($o->member_list->db_xref as $dbxref) {
                 $db = (string) $dbxref->attributes()->db;
                 $id = (string) $dbxref->attributes()->dbkey;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}"));
             }
         }
         if (isset($o->external_doc_list)) {
             foreach ($o->external_doc_list->db_xref as $dbxref) {
                 $db = (string) $dbxref->attributes()->db;
                 $id = (string) $dbxref->attributes()->dbkey;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}"));
             }
         }
         if (isset($o->structure_db_links->db_xref)) {
             foreach ($o->structure_db_links->db_xref as $dbxref) {
                 $db = (string) $dbxref->attributes()->db;
                 $id = (string) $dbxref->attributes()->dbkey;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}"));
             }
         }
         // taxon distribution
         foreach ($o->taxonomy_distribution->taxon_data as $t) {
             $organism = (string) $t->attributes()->name;
             $number = (string) $t->attributes()->proteins_count;
             parent::addRDF(parent::triplifyString($s, parent::getVoc() . "taxon-distribution", "{$organism} ({$number})"));
         }
     }
 }
Beispiel #20
0
 private function OWL2RDF($abbv)
 {
     $filename = parent::getReadFile()->getFilename();
     $buf = file_get_contents("compress.zlib://" . $filename);
     $parser = ARC2::getRDFXMLParser('file://' . $filename);
     $parser->parse("http://bio2rdf.org/bioportal#", $buf);
     $triples = $parser->getTriples();
     foreach ($triples as $i => $a) {
         $this->TriplifyMap($a, strtolower($abbv));
         parent::writeRDFBufferToWriteFile();
     }
     parent::clear();
 }