Example #1
0
 public function Run()
 {
     $dataset_description = '';
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     //first get the files that are to be processed
     $selectedPackage = trim(parent::getParameterValue('files'));
     if ($selectedPackage == 'all') {
         $files = $this->getPackageMap();
     } else {
         $sel_arr = explode(",", $selectedPackage);
         $pm = $this->getPackageMap();
         $files = array();
         foreach ($sel_arr as $a) {
             if (array_key_exists($a, $pm)) {
                 $files[$a] = $pm[$a];
             }
         }
     }
     //download
     if ($this->getParameterValue('download')) {
         foreach ($files as $aP => $fn) {
             foreach ($fn as $aFn) {
                 echo "downloading file {$aFn} :" . parent::getParameterValue('download_url') . $aFn . "..." . PHP_EOL;
                 file_put_contents($ldir . $aFn, file_get_contents(parent::getParameterValue('download_url') . $aFn));
             }
         }
     }
     //iterate over the files
     $paths = $this->getFilePaths($ldir, 'gz');
     $lfile = null;
     foreach ($files as $k => $val) {
         foreach ($val as $fn) {
             if (in_array($fn, $paths)) {
                 $lfile = $fn;
                 $ofile = $odir . basename($fn, ".gz") . "." . parent::getParameterValue('output_format');
                 $gz = false;
                 if (strstr(parent::getParameterValue('output_format'), "gz")) {
                     $gz = true;
                 }
                 parent::setWriteFile($ofile, $gz);
                 parent::setReadFile($ldir . $lfile, true);
                 $source_file = (new DataResource($this))->setURI(parent::getParameterValue('download_url') . basename($fn))->setTitle('International Protein Index filename: ' . basename($fn))->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($ldir . $lfile)))->setFormat('text/ipi-format')->setFormat('application/zip')->setPublisher('https://www.ebi.ac.uk')->setHomepage('https://www.ebi.ac.uk/IPI')->setRights('use')->setRights('attribution')->setLicense('https://www.ebi.ac.uk')->setDataset(parent::getDatasetURI());
                 $prefix = parent::getPrefix();
                 $bVersion = parent::getParameterValue('bio2rdf_release');
                 $date = date("Y-m-d\\TG:i:s\\Z");
                 $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ipi/ipi.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("restricted-by-source-license")->setLicense("http://creativecommons/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
                 $dataset_description .= $output_file->toRDF() . $source_file->toRDF();
                 echo "processing {$fn} ...";
                 $this->{$k}();
                 echo "done!" . PHP_EOL;
                 $this->setWriteFile($odir . $this->getBio2RDFReleaseFile());
                 $this->getWriteFile()->write($dataset_description);
                 $this->getWriteFile()->close();
             }
         }
     }
 }
Example #2
0
 function Run()
 {
     $idir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $files = parent::getParameterValue('files');
     if ($files == 'all') {
         $list = explode('|', parent::getParameterList('files'));
         array_shift($list);
     } else {
         $list = explode(',', parent::getParameterValue('files'));
     }
     $dataset_description = '';
     foreach ($list as $item) {
         $lfile = $idir . $item . '.rpt';
         $rfile = parent::getParameterValue('download_url') . $item . '.rpt';
         if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
             echo "downloading {$item}...";
             $ret = Utils::DownloadSingle($rfile, $lfile);
             if ($ret != true) {
                 continue;
             }
         }
         parent::setReadFile($lfile, true);
         echo "Processing {$item}...";
         $ofile = $odir . $item . '.' . parent::getParameterValue('output_format');
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
         parent::setWriteFile($ofile, $gz);
         $this->{$item}();
         parent::getWriteFile()->close();
         parent::getReadFile()->close();
         echo "Done" . PHP_EOL;
         parent::clear();
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("MGI {$item}")->setRetrievedDate(date("Y-m-d\\TH:i:s", filemtime($lfile)))->setFormat("text")->setPublisher("http://www.informatics.jax.org")->setHomepage("http://www.informatics.jax.org")->setRights("use")->setLicense("http://www.informatics.jax.org/mgihome/other/copyright.shtml")->setDataset("http://identifiers.org/mgi/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TH:i:s");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$item} in {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/mgi/mgi.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     }
     //foreach
     // generate the dataset release file
     $this->setWriteFile($odir . parent::getBio2RDFReleaseFile());
     $this->getWriteFile()->write($dataset_description);
     $this->getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Example #3
0
 function run()
 {
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $dd = '';
     $files = parent::getParameterValue('files');
     if ($files == 'all') {
         $files = explode('|', parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(',', parent::getParameterValue('files'));
     }
     foreach ($files as $file) {
         echo "processing {$file} ...";
         $lfile = $ldir . $this->filemap[$file];
         $rfile = parent::getParameterValue('download_url') . $this->filemap[$file];
         if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
             $ret = utils::downloadSingle($rfile, $lfile);
             if ($ret === false) {
                 echo "unable to download {$file} ... skipping" . PHP_EOL;
                 continue;
             }
         }
         parent::setReadFile($lfile, true);
         $suffix = parent::getParameterValue('output_format');
         $ofile = "orphanet-" . $file . '.' . $suffix;
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? $gz = true : ($gz = false);
         /*			parent::setWriteFile($odir.$ofile, $gz);
         			$this->$file($lfile);
         			parent::getWriteFile()->close();
         */
         parent::getReadFile()->close();
         parent::clear();
         echo "done!" . PHP_EOL;
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Orphanet: {$file}")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("application/xml")->setPublisher("http://www.orpha.net")->setHomepage("http://www.orpha.net/")->setRights("use")->setRights("sharing-modified-version-needs-permission")->setLicense("http://creativecommons.org/licenses/by-nd/3.0/")->setDataset("http://identifiers.org/orphanet/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = parent::getDate(filemtime($odir . $ofile));
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/orphanet/orphanet.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dd .= $source_file->toRDF() . $output_file->toRDF();
     }
     //foreach
     parent::writeToReleaseFile($dd);
 }
Example #4
0
 function Run()
 {
     $file = "homologene.data";
     $ldir = $this->GetParameterValue('indir');
     $odir = $this->GetParameterValue('outdir');
     $rdir = $this->GetParameterValue('download_url');
     $lfile = $ldir . $file;
     if (!file_exists($lfile)) {
         trigger_error($file . " not found. Will attempt to download.", E_USER_NOTICE);
         parent::setParameterValue('download', true);
     }
     //download
     $rfile = $rdir . $file;
     if ($this->GetParameterValue('download') == true) {
         echo "downloading {$file} ... ";
         utils::downloadSingle($rfile, $lfile);
     }
     $ofile = 'homologene.' . parent::getParameterValue('output_format');
     $gz = strstr(parent::getParameterValue('output_format'), "gz") ? $gz = true : ($gz = false);
     parent::setReadFile($lfile);
     parent::setWriteFile($odir . $ofile, $gz);
     echo "processing {$file}... ";
     $this->process();
     echo "done!" . PHP_EOL;
     parent::getWriteFile()->close();
     // generate the dataset release file
     $source_file = (new DataResource($this))->setURI($rfile)->setTitle("NCBI Homologene")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setPublisher("http://www.ncbi.nlm.nih.gov")->setHomepage("http://www.ncbi.nlm.nih.gov/homologene")->setRights("use-share-modify")->setLicense("http://www.ncbi.nlm.nih.gov/About/disclaimer.html")->setDataset("http://identifiers.org/homologene/");
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/homologene/homologene.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description = $source_file->toRDF() . $output_file->toRDF();
     echo "Generating dataset description... ";
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Example #5
0
 function run()
 {
     $dataset_description = '';
     $ldir = parent::GetParameterValue('indir');
     $odir = parent::GetParameterValue('outdir');
     //download
     if ($this->GetParameterValue('download') == true) {
         $list = $this->getFtpFileList('ftp.ncbi.nih.gov');
         $total = count($list);
         $counter = 1;
         foreach ($list as $f) {
             echo "downloading file {$counter} out of {$total} :" . parent::getParameterValue('download_url') . $f . "... " . PHP_EOL;
             file_put_contents($ldir . $f, file_get_contents(parent::GetParameterValue('download_url') . $f));
             $counter++;
         }
     }
     //if download
     //iterate over the files
     $paths = $this->getFilePaths($ldir, 'gz');
     $lfile = null;
     foreach ($paths as $aPath) {
         $lfile = $aPath;
         $ofile = $odir . basename($aPath, ".gz") . "." . parent::getParameterValue('output_format');
         $gz = false;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         parent::setWriteFile($ofile, $gz);
         parent::setReadFile($ldir . $lfile, true);
         $source_file = (new DataResource($this))->setURI(parent::getParameterValue('download_url') . basename($aPath))->setTitle('NCBI UniSTS filename: ' . basename($aPath))->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($ldir . $lfile)))->setFormat('xml/unists-format')->setFormat('application/zip')->setPublisher('https://www.ncbi.nlm.nih.gov')->setHomepage('https://www.ncbi.nlm.nih.gov/unists')->setRights('use')->setRights('attribution')->setLicense('https://www.nlm.nih.gov/copyright.html')->setDataset(parent::getDatasetURI());
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/unists/unists.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("restricted-by-source-license")->setLicense("http://creativecommons/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         $dataset_description .= $output_file->toRDF() . $source_file->toRDF();
         echo "processing {$aPath} ...";
         $this->process();
         echo "done!" . PHP_EOL;
         $this->setWriteFile($odir . $this->getBio2RDFReleaseFile());
         $this->getWriteFile()->write($dataset_description);
         $this->getWriteFile()->close();
     }
     //foreach
 }
Example #6
0
 function Run()
 {
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     //check dependencies
     $d = $this->checkDependencies();
     if ($d) {
         //build pdb2rdf
         echo "building pdb2rdf..." . PHP_EOL;
         $cmd = "mvn clean install -DskipTests -f " . __DIR__ . "/pom.xml";
         $build_out = shell_exec($cmd);
         $out_ver = $this->verifyMavenBuildOutput($build_out);
         if ($out_ver) {
             //now check if download is desired
             if ($this->getParameterValue('download')) {
                 if (!$this->downloadFiles($ldir)) {
                     trigger_error("Not all files downloaded!", E_USER_WARNING);
                 }
             }
             //extract pdb2rdf-cli from the target directory
             if (!$this->extractCli()) {
                 trigger_error("Could not extract pdb2rdf!", E_USER_ERROR);
             }
             //now get ready to run pdb2rdf.sh
             if (!$this->runPdb2Rdf($ldir, $odir)) {
                 trigger_error("Could not run Pdb2RDF correctly!", E_USER_ERROR);
                 exit;
             } else {
                 echo "done!\n";
             }
         } else {
             trigger_error("Could not build pdb2rdf. Please try manually!", E_USER_ERROR);
         }
     } else {
         trigger_error("Dependencies not met!", E_USER_ERROR);
         exit;
     }
 }
Example #7
0
 function Run()
 {
     echo "processing miriam database";
     // directory shortcuts
     $ldir = $this->getParameterValue('indir');
     $odir = $this->getParameterValue('outdir');
     // download and set the read file
     $file = 'miriam.xml';
     $rfile = $this->getParameterValue("download_url");
     $lfile = $ldir . $file;
     if (!file_exists($lfile) || $this->getParameterValue("download") == "true") {
         utils::downloadSingle($rfile, $lfile);
     }
     parent::setReadFile($lfile);
     // set the write file
     $outfile = "miriam." . parent::getParameterValue('output_format');
     $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
     parent::setWriteFile(parent::getParameterValue("outdir") . $outfile, $gz);
     $this->parse();
     parent::WriteRDFBufferToWriteFile();
     $this->getWriteFile()->Close();
     return true;
 }
Example #8
0
 function Run()
 {
     $idir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $files = parent::getParameterValue('files');
     // set the work
     if ($files != 'all') {
         // check if comma-separated, or hyphen-range
         $list = explode(",", $files);
         if (count($list) == 1) {
             // try hyphen separated
             $range = explode("-", $files);
             if (count($range) == 2) {
                 for ($i = $range[0]; $i <= $range[1]; $i++) {
                     $myfiles[] = $i;
                 }
             } else {
                 // must a single entry
                 $myfiles[] = $files;
             }
         } else {
             $myfiles = $list;
         }
     }
     $rest_uri = 'http://sabiork.h-its.org/sabioRestWebServices/';
     $getReactionIds_url = $rest_uri . "suggestions/SABIOReactionIDs";
     $reaction_list_file = $idir . "reactions.xml";
     if (!file_exists($reaction_list_file) || parent::getParameterValue('download') == 'true') {
         $xml = file_get_contents($getReactionIds_url);
         if (FALSE === $reaction_list_file) {
             exit;
         }
         $f = new FileFactory($reaction_list_file);
         $f->Write($xml);
         $f->Close();
     }
     $xml = simplexml_load_file($reaction_list_file);
     $total = count($xml->SABIOReactionID);
     if (isset($myfiles)) {
         $total = count($myfiles);
     }
     $i = 0;
     parent::setCheckpoint('dataset');
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $suffix = parent::getParameterValue('output_format');
     $ofile = "sabiork." . $suffix;
     $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
     parent::setWriteFile($odir . $ofile, $gz);
     foreach ($xml->SABIOReactionID as $rid) {
         parent::setCheckpoint('file');
         if (isset($myfiles)) {
             if (!in_array($rid, $myfiles)) {
                 continue;
             }
         }
         $i++;
         echo "{$i} / {$total} : reaction {$rid}" . PHP_EOL;
         $reaction_file = $idir . "reaction_" . $rid . ".owl.gz";
         if (!file_exists($reaction_file) || $this->GetParameterValue('download') == 'true') {
             $url = $rest_uri . 'searchKineticLaws/biopax?q=SabioReactionID:' . $rid;
             $data = file_get_contents($url);
             if ($data === FALSE) {
                 continue;
             }
             $f = new FileFactory($reaction_file, true);
             $f->Write($data);
             $f->Close();
         }
         $buf = file_get_contents("compress.zlib://" . $reaction_file);
         // send for parsing
         $p = new BioPAX2Bio2RDF($this);
         $p->SetBuffer($buf)->SetBioPAXVersion(3)->SetBaseNamespace("http://sabio.h-its.org/biopax#")->SetBio2RDFNamespace("http://bio2rdf.org/sabiork:")->SetDatasetURI($this->GetDatasetURI());
         $rdf = $p->Parse();
         parent::getWriteFile()->Write($rdf);
     }
     parent::getWriteFile()->Close();
     //generate dataset description
     echo "Generating dataset description... ";
     $source_file = (new DataResource($this))->setURI("http://sabiork.h-its.org/sabioRestWebServices/searchKineticLaws/biopax")->setTitle("SABIO-RK Biochemical Reaction Kinetics Database")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($odir . $ofile)))->setFormat("text/xml")->setPublisher("http://sabio.villa-bosch.de/")->setHomepage("http://sabio.villa-bosch.de/")->setRights("use-share-modify")->setRights("no-commercial")->setLicense("http://sabio.villa-bosch.de/layouts/content/termscondition.gsp")->setDataset("http://identifiers.org/sabiork.reaction/");
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sabiork/sabiork.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description = $source_file->toRDF() . $output_file->toRDF();
     //write dataset description to file
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Example #9
0
 function Run()
 {
     $ldir = $this->GetParameterValue('indir');
     $odir = $this->GetParameterValue('outdir');
     $rfile = $this->GetParameterValue('download_url');
     $lfile = substr($rfile, strrpos($rfile, "/") + 1);
     // check if exists
     if (!file_exists($ldir . $lfile) or parent::getParameterValue('download') == 'true') {
         echo "dowloading {$rfile} ...";
         trigger_error("Will attempt to download ", E_USER_NOTICE);
         Utils::DownloadSingle($rfile, $ldir . $lfile);
         echo "done" . PHP_EOL;
     }
     // make sure we have the zip archive
     $zin = new ZipArchive();
     if ($zin->open($ldir . $lfile) === FALSE) {
         trigger_error("Unable to open {$ldir}{$lfile}");
         exit;
     }
     // get the work
     if ($this->GetParameterValue('files') == 'all') {
         $files = explode("|", $this->GetParameterList('files'));
         array_shift($files);
     } else {
         $files = explode("|", $this->GetParameterValue('files'));
     }
     $gz = false;
     if (strstr(parent::getParameterValue('output_format'), "gz")) {
         $gz = true;
     }
     $outfile = "ndc." . parent::getParameterValue('output_format');
     parent::setWriteFile($odir . $outfile, $gz);
     // now go through each item in the zip file and process
     foreach ($files as $file) {
         echo "Processing {$file}... ";
         $fpin = $zin->getStream($file . ".txt");
         if (!$fpin) {
             trigger_error("Unable to get pointer to {$file} in {$ldir}{$lfile}", E_USER_ERROR);
             return FALSE;
         }
         $this->{$file}($fpin);
         parent::writeRDFBufferToWriteFile();
         echo "done!" . PHP_EOL;
     }
     parent::getWriteFile()->close();
     echo "Generating dataset description for {$outfile}... ";
     //start generating dataset description file
     $dataset_description = '';
     $source_file = (new DataResource($this))->setURI($rfile)->setTitle("FDA National Drug Code Directory")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($ldir . $lfile)))->setFormat("text/tab-separated-value")->setFormat("application/zip")->setPublisher("http://www.fda.gov")->setHomepage("http://www.fda.gov/Drugs/InformationOnDrugs/ucm142438.htm")->setRights("use-share")->setLicense(null)->setDataset("http://identifiers.org/ndc/");
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ndc/ndc.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description = $source_file->toRDF() . $output_file->toRDF();
     //write dataset description to file
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Example #10
0
 function run()
 {
     // get the file list
     if ($this->GetParameterValue('files') == 'all') {
         $files = explode("|", $this->GetParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(",", $this->GetParameterValue('files'));
     }
     if ($this->getParameterValue('additional') != 'none') {
         $f = explode(",", $this->getParameterValue('additional'));
         $files = array_merge($files, $f);
     }
     $ldir = $this->GetParameterValue('indir');
     $odir = $this->GetParameterValue('outdir');
     $rdir = $this->GetParameterValue('download_url');
     $dataset_description = '';
     foreach ($files as $file) {
         $suffix = ".zip";
         $lfile = $ldir . $file . $suffix;
         $rfile = $rdir . $file . $suffix;
         if ($file == "offsides" and !file_exists($lfile)) {
             echo "downloading twosides...";
             $rfile = "http://www.pharmgkb.org/redirect.jsp?p=ftp%3A%2F%2Fftpuserd%3AGKB4ftp%40ftp.pharmgkb.org%2Fdownload%2Ftatonetti%2F3003377s-offsides.zip";
             utils::DownloadSingle($rfile, $lfile);
             echo "done" . PHP_EOL;
         } elseif ($file == "twosides" and !file_exists($lfile)) {
             echo "downloading {$file} ...";
             $rfile = "http://www.pharmgkb.org/redirect.jsp?p=ftp%3A%2F%2Fftpuserd%3AGKB4ftp%40ftp.pharmgkb.org%2Fdownload%2Ftatonetti%2F3003377s-twosides.zip";
             utils::DownloadSingle($rfile, $lfile);
             echo "done" . PHP_EOL;
         } elseif ($file == 'annotations' or $file == 'relationships') {
             if (!file_exists($lfile)) {
                 echo "Contact PharmGKB to get access to variants/clinical variants; save file as annotations.zip" . PHP_EOL;
                 continue;
             }
         } else {
             if (!file_exists($lfile) or parent::getParameterValue('download') == true) {
                 echo "Downloading {$lfile} ... ";
                 Utils::DownloadSingle('https://www.pharmgkb.org/download.do?objId=' . $file . '.zip&dlCls=common', $lfile);
                 echo "done" . PHP_EOL;
             }
         }
         // get a pointer to the file in the zip archive
         if (!file_exists($lfile)) {
             echo "no local copy of {$lfile} . skipping" . PHP_EOL;
             continue;
         }
         $zin = new ZipArchive();
         if ($zin->open($lfile) === FALSE) {
             trigger_error("Unable to open {$lfile}");
             exit;
         }
         $zipentries = array();
         if ($file == "annotations") {
             // exclude: 'clinical_ann.tsv','study_parameters.tsv'
             $zipentries = array('clinical_ann_metadata.tsv', 'var_drug_ann.tsv', 'var_pheno_ann.tsv', 'var_fa_ann.tsv');
         } else {
             if ($file == "pathways") {
                 for ($i = 0; $i < $zin->numFiles; $i++) {
                     $stat = $zin->statIndex($i);
                     $entry = $stat['name'];
                     $ext = pathinfo($entry, PATHINFO_EXTENSION);
                     if ($ext != "txt") {
                         $zipentries[] = $entry;
                     }
                 }
             } else {
                 if ($file == "relationships") {
                     $zipentries = array("relationships.tsv");
                 } else {
                     if ($file == 'offsides') {
                         $zipentries = array('3003377s-offsides.tsv');
                     } else {
                         if ($file == 'twosides') {
                             $zipentries = array('3003377s-twosides.tsv');
                         } else {
                             $zipentries = array($file . ".tsv");
                         }
                     }
                 }
             }
         }
         // set the write file, parse, write and close
         $suffix = parent::getParameterValue('output_format');
         $outfile = $file . '.' . $suffix;
         $gz = false;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         $this->SetWriteFile($odir . $outfile, $gz);
         foreach ($zipentries as $zipentry) {
             if (($fp = $zin->getStream($zipentry)) === FALSE) {
                 trigger_error("Unable to get {$file}.tsv in ziparchive {$lfile}");
                 return FALSE;
             }
             $this->SetReadFile($lfile);
             $this->GetReadFile()->SetFilePointer($fp);
             if ($file == "annotations") {
                 $fnx = substr($zipentry, 0, strpos($zipentry, ".tsv"));
                 echo "processing {$zipentry}..";
             } else {
                 if ($file == 'pathways') {
                     $fnx = 'pathways';
                     echo "processing {$fnx} ({$zipentry})... ";
                 } else {
                     $fnx = $file;
                     echo "processing {$fnx} ... ";
                 }
             }
             $this->{$fnx}();
             parent::writeRDFBufferToWriteFile();
             parent::clear();
             echo "done!" . PHP_EOL;
             // generate the dataset release file
             $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Pharmacogenomics Knowledge Base ({$zipentry})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/zip")->setPublisher("http://www.pharmgkb.org/")->setHomepage("http://www.pharmgkb.org/")->setRights("use")->setRights("no-commercial")->setLicense("http://www.pharmgkb.org/page/policies")->setDataset("http://identifiers.org/pharmgkb/");
             $prefix = parent::getPrefix();
             $bVersion = parent::getParameterValue('bio2rdf_release');
             $date = date("Y-m-d\\TG:i:s\\Z");
             $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} {$file} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pharmgkb/pharmgkb.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
             if ($gz) {
                 $output_file->setFormat("application/gzip");
             }
             if (strstr(parent::getParameterValue('output_format'), "nt")) {
                 $output_file->setFormat("application/n-triples");
             } else {
                 $output_file->setFormat("application/n-quads");
             }
             $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
         }
         $this->GetWriteFile()->Close();
     }
     // foreach
     echo "Generating dataset description... ";
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Example #11
0
 function Run()
 {
     $file = "hgnc_complete_set.txt.gz";
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     $lfile = $ldir . $file;
     if (!file_exists($lfile) && parent::getParameterValue('download') == false) {
         trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE);
         parent::setParameterValue('download', true);
     }
     //download the hgnc file
     $rfile = null;
     if (parent::getParameterValue('download') == true) {
         $rfile = $rdir;
         echo "downloading {$file} ... ";
         Utils::DownloadSingle($rfile, $lfile);
     }
     $ofile = $odir . "hgnc." . parent::getParameterValue('output_format');
     $gz = false;
     if (strstr(parent::getParameterValue('output_format'), "gz")) {
         $gz = true;
     }
     parent::setWriteFile($ofile, $gz);
     parent::setReadFile($lfile, true);
     echo "processing {$file}... ";
     $this->process();
     echo "done!" . PHP_EOL;
     //close write file
     parent::getWriteFile()->close();
     echo PHP_EOL;
     // generate the dataset release file
     echo "generating dataset release file... ";
     $dataset_description = '';
     $source_file = (new DataResource($this))->setURI($rdir)->setTitle('HUGO Gene Nomenclature Committee (HGNC)')->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat('text/tab-separated-value')->setFormat('application/zip')->setPublisher('http://www.genenames.org/')->setHomepage('http://www.genenames.org/data/gdlw_columndef.html')->setRights('use')->setRights('attribution')->setLicense('http://www.genenames.org/about/overview')->setDataset(parent::getDatasetURI());
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/hgnc/hgnc.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("restricted-by-source-license")->setLicense("http://creativecommons/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     $this->setWriteFile($odir . $this->getBio2RDFReleaseFile());
     $this->getWriteFile()->write($dataset_description);
     $this->getWriteFile()->close();
 }
Example #12
0
 public function run()
 {
     if (parent::getParameterValue('files') == 'all') {
         $files = explode("|", parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(",", parent::getParameterValue('files'));
     }
     $release = parent::getParameterValue('release');
     $releaseb = "WS249";
     $remote_files = array("geneIDs" => "species/c_elegans/annotation/geneIDs/c_elegans.PRJNA13758." . $release . ".geneIDs.txt.gz", "functional_descriptions" => "species/c_elegans/annotation/functional_descriptions/c_elegans.PRJNA13758." . $release . ".functional_descriptions.txt.gz", "gene_interactions" => "species/c_elegans/annotation/gene_interactions/c_elegans.PRJNA13758." . $release . ".gene_interactions.txt.gz", "gene_associations" => "releases/current-production-release/ONTOLOGY/gene_association." . $releaseb . ".wb", "phenotype_associations" => "releases/current-production-release/ONTOLOGY/phenotype_association." . $releaseb . ".wb");
     $local_files = array("geneIDs" => "wormbase." . parent::getParameterValue('release') . ".genes.txt.gz", "functional_descriptions" => "wormbase." . parent::getParameterValue('release') . ".functional_descriptions.txt.gz", "gene_interactions" => "wormbase." . parent::getParameterValue('release') . ".gene_interactions.txt.gz", "gene_associations" => "wormbase." . parent::getParameterValue('release') . ".gene_association.wb", "phenotype_associations" => "wormbase." . parent::getParameterValue('release') . ".phenotype_associations.wb");
     $idir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     $dataset_description = '';
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     foreach ($files as $file) {
         $lfile = $idir . $local_files[$file];
         $rfile = $rdir . $remote_files[$file];
         if (!file_exists($lfile) or parent::getParameterValue('download') == true) {
             trigger_error($lfile . " not found. Will attempt to download." . PHP_EOL, E_USER_WARNING);
             echo "Downloading {$rfile}... ";
             Utils::DownloadSingle($rfile, $lfile);
             echo "done!" . PHP_EOL;
         }
         if (strstr($lfile, "gz")) {
             parent::setReadFile($lfile, TRUE);
         } else {
             parent::setReadFile($lfile, FALSE);
         }
         $suffix = parent::getParameterValue('output_format');
         $ofile = "wormbase." . $file . "." . $suffix;
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
         parent::setWriteFile($odir . $ofile, $gz);
         echo "Processing {$file}... ";
         $fnx = $file;
         $this->{$fnx}();
         echo "done!" . PHP_EOL;
         parent::getWriteFile()->close();
         // generate the dataset release file
         echo "Generating dataset description for {$ofile}... ";
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("WormBase Release " . parent::getParameterValue('release') . " subset ({$file})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://wormbase.org/")->setHomepage("http://wormbase.org/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("http://www.wormbase.org/about/policies")->setDataset("http://identifiers.org/wormbase/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/wormbase/wormbase.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
         echo "done!" . PHP_EOL;
     }
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
 }
Example #13
0
 function Run()
 {
     // get the file list
     if (parent::getParameterValue('files') == 'all') {
         $files = array('all');
     } else {
         $files = explode(",", parent::getParameterValue('files'));
     }
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     $dataset_description = '';
     foreach ($files as $file) {
         $download = parent::getParameterValue('download');
         $version = parent::getParameterValue("version");
         $zip_file = ucfirst($file) . ".mitab." . $version . ".txt.zip";
         $lfile = $ldir . $zip_file;
         $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
         $ofile = "irefindex-" . $file . "." . parent::getParameterValue('output_format');
         if (!file_exists($lfile)) {
             trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE);
             $download = true;
         }
         $rfile = $rdir . $zip_file;
         if ($download == true) {
             echo "downloading {$rfile}" . PHP_EOL;
             if (FALSE === Utils::DownloadSingle($rfile, $lfile)) {
                 trigger_error("Error in Download");
                 return FALSE;
             }
         }
         $zin = new ZipArchive();
         if ($zin->open($lfile) === FALSE) {
             trigger_error("Unable to open {$lfile}");
             exit;
         }
         if ($zin->numFiles != 1) {
             trigger_error("Found more than one file ... using first file");
         }
         $f = $zin->statIndex(0);
         $base_file = $f['name'];
         if (($fp = $zin->getStream($base_file)) === FALSE) {
             trigger_error("Unable to get {$base_file} in ziparchive {$lfile}");
             return FALSE;
         }
         parent::setReadFile($lfile);
         parent::getReadFile()->setFilePointer($fp);
         echo "Processing " . $file . " ...";
         parent::setWriteFile($odir . $ofile, true);
         if ($this->Parse() === FALSE) {
             trigger_error("Parsing Error");
             exit;
         }
         parent::writeRDFBufferToWriteFile();
         parent::getWriteFile()->close();
         $zin->close();
         echo "Done!" . PHP_EOL;
         $graph_uri = parent::getGraphURI();
         if (parent::getParameterValue('dataset_graph') == true) {
             parent::setGraphURI(parent::getDatasetURI());
         }
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("iRefIndex ({$zip_file}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/zip")->setPublisher("http://irefindex.uio.no")->setHomepage("http://irefindex.uio.no")->setRights("use")->setRights("by-attribution")->setRights("no-commercial")->setLicense("http://irefindex.uio.no/wiki/README_MITAB2.6_for_iRefIndex#License")->setDataset("http://identifiers.org/irefindex/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/irefindex/irefindex.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
         parent::setGraphURI($graph_uri);
     }
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     return TRUE;
 }
Example #14
0
 /**
  * process a results xml file from the download directory
  **/
 function process_file($infile)
 {
     $indir = parent::getParameterValue('indir');
     $xml = new CXML($infile);
     $this->setCheckPoint('file');
     while ($xml->Parse("clinical_study") == TRUE) {
         $this->setCheckPoint('record');
         $this->root = $root = $xml->GetXMLRoot();
         $this->nct_id = $nct_id = $this->getString("//id_info/nct_id");
         $this->study_id = $study_id = parent::getNamespace() . "{$nct_id}";
         ### declare
         $label = $this->getString("//brief_title");
         if (!$label) {
             $label = $this->getString("//official_title");
         }
         if (!$label) {
             $label = "Clinical trial #" . $nct_id;
         }
         parent::addRDF(parent::describeIndividual($study_id, $label, parent::getVoc() . "Clinical-Study") . parent::describeClass(parent::getVoc() . "Clinical-Study", "Clinical Study"));
         ##########################################################################################
         #required header
         ##########################################################################################
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "download-date", $this->getString('//required_header/download_date')) . parent::triplify($study_id, parent::getVoc() . "url", $this->getString('//required_header/url')));
         ##########################################################################################
         #identifiers
         ##########################################################################################
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "nct-id", $this->getString('//id_info/nct_id'), "xsd:string") . parent::triplifyString($study_id, parent::getVoc() . "org-study-id", $this->getString('//id_info/org_study_id'), "xsd:string"));
         $sids = $root->xpath('//id_info/secondary_id');
         if (isset($sids)) {
             foreach ($sids as $id) {
                 parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "secondary-id", (string) $id, "xsd:string"));
             }
         }
         $nctaliases = $root->xpath('//id_info/nct-alias');
         if (isset($nctaliases)) {
             foreach ($nctaliases as $id) {
                 parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "nct-alias", (string) $id, "xsd:string"));
             }
         }
         ##########################################################################################
         #titles
         ##########################################################################################
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "brief-title", $this->getString("//brief_title")) . parent::triplifyString($study_id, parent::getVoc() . "official-title", $this->getString("//official_title")));
         ###################################################################################
         #brief summary
         ###################################################################################
         $brief_summary = str_replace(array("\r", "\n", "\t"), array("&#xD;", "&#xA;", "&#x9;"), $this->getString('//brief_summary/textblock'));
         parent::addRDF(parent::triplifyString($study_id, $this->getVoc() . "brief-summary", $brief_summary));
         ####################################################################################
         # detailed description
         ####################################################################################
         $d = str_replace(array("\r", "\n", "\t"), array("&#xD;", "&#xA;", "&#x9;"), $this->getString('//detailed_description/textblock'));
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "detailed-description", $d));
         #########################################################################################
         #acronym
         #########################################################################################
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "acronym", $this->getString("//acronym")));
         ########################################################################################
         #sponsors
         ########################################################################################
         try {
             $sponsors = array("lead_sponsor", "collaborator");
             foreach ($sponsors as $sponsor) {
                 $a = @array_shift($root->xpath('//sponsors/' . $sponsor));
                 if ($a == null) {
                     break;
                 }
                 $agency = $this->getString("//agency", $a);
                 $agency_id = parent::getRes() . md5($agency);
                 $agency_class = $this->getString("//agency_class", $a);
                 $agency_class_id = parent::getRes() . md5($agency_class);
                 parent::addRDF(parent::describeIndividual($agency_id, $agency, parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $sponsor), $agency_id) . parent::describeIndividual($agency_class_id, $agency_class, parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplify($agency_id, parent::getVoc() . "organization", $agency_class_id));
             }
         } catch (Exception $e) {
             echo "There was an error in the lead sponsor element: {$e}\n";
         }
         #################################################################################
         # source
         #################################################################################
         $source = $this->getString('//source');
         if ($source) {
             $source_id = parent::getRes() . md5($source);
             parent::addRDF(parent::describeIndividual($source_id, $source, parent::getVoc() . "Organization") . parent::triplify($study_id, parent::getVoc() . "source", $source_id));
         }
         ######################################################################################
         # oversight
         ######################################################################################
         try {
             $oversight = @array_shift($root->xpath('//oversight_info'));
             $oversight_id = parent::getRes() . md5($oversight->asXML());
             $authority = $this->getString('//authority', $oversight);
             $authority_id = parent::getRes() . md5($authority);
             parent::addRDF(parent::describeIndividual($oversight_id, $authority, parent::getVoc() . "Organization") . parent::triplify($study_id, $this->getVoc() . "oversight", $oversight_id) . parent::triplify($study_id, $this->getVoc() . "authority", $authority_id) . parent::triplifyString($oversight_id, parent::getVoc() . "has-dmc", $this->getString('//has_dmc', $oversight)));
         } catch (Exception $e) {
             echo "There was an error in the oversight info element: {$e}\n";
         }
         #################################################################################
         # overall status
         #################################################################################
         $overall_status = $this->getString('//overall_status');
         if ($overall_status) {
             $status_id = parent::getRes() . md5($overall_status);
             parent::addRDF(parent::describeIndividual($status_id, $overall_status, parent::getVoc() . "Status") . parent::describeClass(parent::getVoc() . "Status", "Status") . parent::triplify($study_id, parent::getVoc() . "overall-status", $status_id));
         }
         #########################################################################################
         #why stopped
         #########################################################################################
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "why-stopped", $this->getString("//why_stopped")));
         ##################################################################################
         # dates
         ##################################################################################
         $dates = array("start_date", "end_date", "completion_date", "primary_completion_date", "verification_date", "lastchanged_date", "firstreceived_date", "firstreceived_results_date");
         foreach ($dates as $date) {
             $d = $this->getString('//' . $date);
             if ($d) {
                 $datetime = $this->getDatetimeFromDate($d);
                 if (isset($datetime)) {
                     parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . str_replace("_", "-", $date), $datetime));
                 } else {
                     trigger_error("unable to parse date: {$d}", E_USER_ERROR);
                 }
             }
         }
         ####################################################################################
         # phase
         ####################################################################################
         $phase = $this->getString('//phase');
         if ($phase && $phase != "N/A") {
             $phase_id = $this->getRes() . md5($phase);
             parent::addRDF(parent::describeIndividual($phase_id, $phase, parent::getVoc() . "Phase", $phase) . parent::describeClass(parent::getVoc() . "Phase", $phase) . parent::triplify($study_id, parent::getVoc() . "phase", $phase_id));
         }
         ###################################################################################
         # study type
         ####################################################################################
         $study_type = $this->getString('//study_type');
         if ($study_type) {
             $study_type_id = $this->getRes() . md5($study_type);
             parent::addRDF(parent::describeClass($study_type_id, $study_type, parent::getVoc() . "Study-Type") . parent::describeClass(parent::getVoc() . "Study-Type", "Study Type") . parent::triplify($study_id, parent::getVoc() . "study-type", $study_type_id));
         }
         ###############################################################################
         # study design
         ###############################################################################
         $study_design = $this->getString('//study_design');
         if ($study_design) {
             $study_design_id = parent::getRes() . md5($study_id . $study_design);
             parent::addRDF(parent::describeIndividual($study_design_id, "{$study_id} study design", parent::getVoc() . "Study-Design") . parent::describeClass(parent::getVoc() . "Study-Design", "Study Design") . parent::triplify($study_id, parent::getVoc() . "study-design", $study_design_id));
             // Intervention Model: Parallel Assignment, Masking: Double-Blind, Primary Purpose: Treatment
             foreach (explode(", ", $study_design) as $i => $b) {
                 $c = explode(":  ", $b);
                 if (isset($c[1])) {
                     $sdp = $study_design_id . "-" . ($i + 1);
                     $key = parent::getRes() . md5($c[0]);
                     $value = parent::getRes() . md5($c[1]);
                     parent::addRDF(parent::describeIndividual($sdp, $b, parent::getVoc() . "Study-Design-Parameter") . parent::describeClass(parent::getVoc() . "Study-Design-Parameter", "Study Design Parameter") . parent::triplify($sdp, parent::getVoc() . "key", $key) . parent::describeClass($key, $c[0]) . parent::triplify($sdp, parent::getVoc() . "value", $value) . parent::describeClass($value, $c[1]) . parent::triplify($study_design_id, parent::getVoc() . "study-design-parameter", $sdp));
                 }
             }
         }
         ####################################################################################
         # target duration
         ####################################################################################
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "target-duration", $this->getString('//target_duration')));
         ################################################################################
         # outcomes
         ###############################################################################
         $outcomes = array("primary_outcome", "secondary_outcome", "other_outcome");
         foreach ($outcomes as $outcome) {
             $o = $root->xpath('//' . $outcome);
             if ($o) {
                 $os = $o;
                 if (!is_array($o)) {
                     $os = array($o);
                 }
                 foreach ($os as $o) {
                     try {
                         $po_id = parent::getRes() . md5($nct_id . $o->asXML());
                         $po_type = parent::getVoc() . str_replace("_", "-", $outcome);
                         $measure = $this->getString('//measure', $o);
                         $time_frame = $this->getString('//time_frame', $o);
                         $safety_issue = $this->getString('//saftey_issue', $o);
                         $description = $this->getString('//description', $o);
                         parent::addRDF(parent::describeIndividual($po_id, $measure . " " . $time_frame, ucfirst($po_type)) . parent::describeClass(ucfirst($po_type), str_replace("_", " ", ucfirst($outcome))) . parent::triplifyString($po_id, "dc:description", $description) . parent::triplifyString($po_id, parent::getVoc() . "measure", $measure) . parent::triplifyString($po_id, parent::getVoc() . "time-frame", $time_frame) . parent::triplifyString($po_id, parent::getVoc() . "safety-issue", $safety_issue) . parent::triplify($study_id, parent::getVoc() . $po_type, $po_id));
                     } catch (Exception $e) {
                         echo "There was an error parsing the primary outcome element: {$e} \n";
                     }
                 }
             }
         }
         ##############################################################################
         #number of arms
         ##############################################################################
         try {
             parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "number-of-arms", $this->getString('//number_of_arms')));
         } catch (Exception $e) {
             echo "There was an exception parsing the number of arms element: {$e}\n";
         }
         ##############################################################################
         #number of groups
         ##############################################################################
         try {
             parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "number-of-arms", $this->getString('//number_of_groups')));
         } catch (Exception $e) {
             echo "There was an exception parsing the number of groups: {$e}\n";
         }
         ##############################################################################
         #enrollment
         ##############################################################################
         try {
             $e = $root->xpath('//enrollment');
             if ($e) {
                 $type = strtolower((string) $e[0]->attributes()->type);
                 $value = $this->getString('//enrollment');
                 parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . ($type ? $type . "-" : "") . "enrollment", $value));
             }
         } catch (Exception $e) {
             echo "There was an exception parsing the enrollment element: {$e}\n";
         }
         ###############################################################################
         #condition
         ###############################################################################
         try {
             $conditions = $root->xpath('//condition');
             foreach ($conditions as $condition) {
                 $mesh_label_id = parent::getRes() . md5($condition);
                 parent::addRDF(parent::triplify($study_id, parent::getVoc() . "condition", $mesh_label_id) . parent::describeClass($mesh_label_id, $condition, parent::getVoc() . "Condition") . parent::describeClass(parent::getVoc() . "Condition", "Condition"));
             }
         } catch (Exception $e) {
             echo "There was an exception parsing condition element: {$e}\n";
         }
         ################################################################################
         # arm_group
         ################################################################################
         try {
             $arm_groups = $root->xpath('//arm_group');
             foreach ($arm_groups as $arm_group) {
                 $arm_group_id = $this->getString('./arm_group_label', $arm_group);
                 $arm_group_id = md5($arm_group_id);
                 $arm_group_uri = parent::getRes() . $this->nct_id . "/arm-group/" . $arm_group_id;
                 $arm_group_label = $this->nct_id . " arm group " . $arm_group_id;
                 $arm_group_type = ucfirst(str_replace(" ", "_", $this->getString('./arm_group_type', $arm_group)));
                 if (!$arm_group_type) {
                     $arm_group_type = "Clinical-Arm";
                 }
                 $description = $this->getString('./description', $arm_group);
                 parent::addRDF(parent::describeIndividual($arm_group_uri, $arm_group_label, parent::getVoc() . $arm_group_type) . parent::describeClass(parent::getVoc() . $arm_group_type, ucfirst(str_replace("_", " ", $arm_group_type))) . parent::triplifyString($arm_group_uri, parent::getVoc() . "description", $description) . parent::describeIndividual($arm_group_uri, $arm_group, parent::getVoc() . "Arm-Group") . parent::describeClass(parent::getVoc() . "Arm-Group", "Arm Group") . parent::triplify($study_id, parent::getVoc() . "arm-group", $arm_group_uri));
             }
         } catch (Exception $e) {
             echo "There was an exception in arm groups: {$e}\n";
         }
         ##############################################################################
         #intervention
         ##############################################################################
         try {
             $interventions = $root->xpath('//intervention');
             foreach ($interventions as $intervention) {
                 $intervention_id = parent::getRes() . md5($intervention->asXML());
                 $intervention_name = $this->getString('./intervention_name', $intervention);
                 $intervention_type = $this->getString('./intervention_type', $intervention);
                 $intervention_type_uri = parent::getVoc() . ucfirst(str_replace(" ", "_", $intervention_type));
                 $intervention_desc = $this->getString('./description', $intervention);
                 $intervention_on = $this->getString('./other_name', $intervention);
                 parent::addRDF(parent::describeIndividual($intervention_id, $intervention_name, $intervention_type_uri) . parent::describeClass($intervention_type_uri, $intervention_type) . parent::triplifyString($intervention_id, parent::getVoc() . "intervention-name", $intervention_name) . parent::triplifyString($intervention_id, parent::getVoc() . "intervention-desc", $intervention_desc) . parent::triplifyString($intervention_id, parent::getVoc() . "other-name", $intervention_on) . parent::triplify($study_id, parent::getvoc() . "intervention", $intervention_id));
                 $agl = $intervention->xpath("./arm_group_label");
                 foreach ($agl as $a) {
                     $arm_group_id = md5($a);
                     $ag = parent::getRes() . $this->nct_id . "/arm-group/" . $arm_group_id;
                     parent::addRDF(parent::describeIndividual($ag, $a, parent::getVoc() . "Arm-Group") . parent::describeClass(parent::getVoc() . "Arm-Group", "Arm Group") . parent::triplify($intervention_id, parent::getVoc() . "arm-group", $ag));
                 }
             }
         } catch (Exception $e) {
             echo "There was an error in interventions {$e}\n";
         }
         ###############################################################################
         #eligibility
         ################################################################################
         try {
             $eligibility = @array_shift($root->xpath('//eligibility'));
             if ($eligibility !== null) {
                 $eligibility_label = "eligibility for " . $study_id;
                 $eligibility_id = parent::getRes() . md5($eligibility->asXML());
                 parent::addRDF(parent::describeIndividual($eligibility_id, $eligibility_label, parent::getVoc() . "Eligibility") . parent::describeClass(parent::getVoc() . "Eligibility", "Eligibility") . parent::triplify($study_id, parent::getVoc() . "eligibility", $eligibility_id));
                 if ($criteria = @array_shift($eligibility->xpath('./criteria'))) {
                     $text = @array_shift($criteria->xpath('./textblock'));
                     parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . "text", $text));
                     $c = preg_split("/(Inclusion Criteria\\:|Exclusion Criteria\\:)/", $text);
                     //inclusion
                     if (isset($c[1])) {
                         $d = explode(" - ", $c[1]);
                         // the lists are separated by a hyphen
                         foreach ($d as $inclusion) {
                             $inc = trim($inclusion);
                             if ($inc != '') {
                                 $inc_id = parent::getRes() . md5($inc);
                                 parent::addRDF(parent::describeIndividual($inc_id, $inc, parent::getVoc() . "Inclusion-Criteria") . parent::describeClass(parent::getVoc() . "Inclusion-Criteria", "Inclusion Criteria") . parent::triplify($eligibility_id, parent::getVoc() . "inclusion-criteria", $inc_id));
                             }
                         }
                     }
                     //exclusion
                     if (isset($c[2])) {
                         $d = explode(" - ", $c[1]);
                         foreach ($d as $exclusion) {
                             $exc = trim($exclusion);
                             if ($exc != '') {
                                 $exc_id = parent::getRes() . md5($exc);
                                 parent::addRDF(parent::describeIndividual($exc_id, $exc, parent::getVoc() . "Exclusion-Criteria") . parent::describeClass(parent::getVoc() . "Exclusion-Criteria", "Exclusion Criteria") . parent::triplify($eligibility_id, parent::getVoc() . "exclusion-criteria", $exc_id));
                             }
                         }
                     }
                 }
                 parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . "gender", $this->getString('./gender', $eligibility)));
                 parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . "healthy-volunteers", $this->getString('./healthy_volunteers', $eligibility)));
                 $attributes = array('minimum_age', 'maximum_age');
                 foreach ($attributes as $a) {
                     $s = $this->getString('./' . $a, $eligibility);
                     if ($s != 'N/A') {
                         $age = trim(str_replace("Years", "", $s));
                         parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . str_replace("_", "-", $a), $age));
                     }
                 }
                 $attributes = array("study_pop" => "study-population", "sampling_method" => "sampling-method");
                 foreach ($attributes as $a => $r) {
                     $e = @array_shift($eligibility->xpath('./' . $a));
                     if ($s = $this->getString('./' . $a, $eligibility)) {
                         parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . $r, $this->getString('./textblock', $e)));
                     }
                 }
             }
         } catch (Exception $e) {
             echo "There was an error in eligibility: {$e}\n";
         }
         ######################################################################################
         #biospec
         #####################################################################################
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "biospec-retention", $this->getString('//biospec_retention')));
         try {
             $b = @array_shift($root->xpath('//biospec_descr'));
             if ($b) {
                 parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "biospec_descr", $this->getString('./textblock', $b)));
             }
         } catch (Exception $e) {
             echo "There was an error in biospec_descr: {$e}\n";
         }
         ###################################################################
         # contacts
         ###################################################################
         $contacts = array("overall_official", "overall_contact", "overall_contact_backup");
         try {
             foreach ($contacts as $c) {
                 $d = @array_shift($root->xpath('//' . $c));
                 if ($d) {
                     parent::addRDF(parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $c), $this->makeContact($d)));
                 }
             }
         } catch (Exception $e) {
             echo "There was an error parsing overall contact: {$e}" . "\n";
         }
         ##############################################################
         # location of facility doing the testing
         ##############################################################
         try {
             $location = @array_shift($root->xpath('//location'));
             if ($location) {
                 $location_uri = parent::getRes() . md5($location->asXML());
                 $name = $this->getString('//facility/name', $location);
                 $address = @array_shift($location->xpath('//facility/address'));
                 $contact = @array_shift($location->xpath('//contact'));
                 $backups = @array_shift($location->xpath('//contact_backup'));
                 $investigators = @array_shift($location->xpath('//investigator'));
                 parent::addRDF(parent::describeIndividual($location_uri, $name, parent::getVoc() . "Location") . parent::describeClass(parent::getVoc() . "Location", "Location") . parent::triplifyString($location_uri, parent::getVoc() . "status", $this->getString('//status', $location)) . parent::triplify($study_id, parent::getVoc() . "location", $location_uri) . parent::triplify($location_uri, parent::getVoc() . "address", $this->makeAddress($address)) . ($contact != null ? parent::triplify($location_uri, parent::getVoc() . "contact", $this->makeContact($contact)) : ""));
                 if ($backups) {
                     foreach ($backups as $backup) {
                         parent::addRDF(parent::triplify($location_uri, parent::getVoc() . "contact-backup", $this->makeContact($backup)));
                     }
                 }
                 if ($investigators) {
                     foreach ($investigators as $investigator) {
                         parent::addRDF(parent::triplify($location_uri, parent::getVoc() . "investigator", $this->makeContact($investigator)));
                     }
                 }
             }
         } catch (Exception $e) {
             echo "There was an error parsing location: {$e}" . "\n";
         }
         ######################################################################
         #countries
         ######################################################################
         try {
             $a = array("location_countries", "removed_countries");
             foreach ($a as $country) {
                 $lc = @array_shift($root->xpath('//' . $country));
                 if ($lc) {
                     $label = $this->getString('//country', $lc);
                     $cid = parent::getRes() . md5($label);
                     parent::addRDF(parent::describeIndividual($cid, $label, parent::getVoc() . "Country") . parent::describeClass(parent::getVoc() . "Country", "Country") . parent::triplify($study_id, parent::getVoc() . "country", $cid));
                 }
             }
         } catch (Exception $e) {
             echo "There was an error parsing country: {$e}" . "\n";
         }
         ######################################################################
         #reference
         ######################################################################
         try {
             $a = array("reference", "result_reference");
             foreach ($a as $ref_type) {
                 $references = $root->xpath('//' . $ref_type);
                 foreach ($references as $reference) {
                     $p = $this->getString('./PMID', $reference);
                     if ($p) {
                         $pmid = "pubmed:{$p}";
                         parent::addRDF(parent::describeIndividual($pmid, $p, parent::getVoc() . "Reference") . parent::describeClass(parent::getVoc() . "Reference", "Reference") . parent::triplifyString($pmid, parent::getVoc() . "citation", $this->getString('./citation', $reference)) . parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $ref_type), $pmid));
                     }
                 }
             }
         } catch (Exception $e) {
             echo "There was an error parsing references element: {$e}\n";
         }
         #######################################################################
         #link
         #######################################################################
         try {
             $links = $root->xpath('//link');
             foreach ($links as $i => $link) {
                 $url = $this->getString('./url', $link);
                 $url = preg_replace("/>.*\$/", "", $url);
                 $lid = parent::getRes() . md5($url);
                 parent::addRDF(parent::describeIndividual($lid, $this->getString('./description', $link), parent::getVoc() . "Link") . parent::describeClass(parent::getVoc() . "Link", "Link") . parent::triplify($lid, parent::getVoc() . "url", $url) . parent::triplify($study_id, parent::getVoc() . "link", $lid));
             }
         } catch (Exception $e) {
             echo "There was an error parsing link element: {$e}\n";
         }
         ############################################################################
         #responsible party
         ############################################################################
         try {
             $rp = @array_shift($root->xpath('//responsible_party'));
             if ($rp) {
                 $rp_id = parent::getRes() . md5($rp->asXML());
                 $label = $this->getString('./name_title', $rp);
                 if (!$label) {
                     $label = $this->getString('./organization', $rp);
                 } else {
                     $label .= ", " . $this->getString('./organization', $rp);
                 }
                 if (!$label) {
                     $label = $this->getString('./party_type', $rp);
                 }
                 $org_id = parent::getRes() . md5($this->getString('./organization', $rp));
                 parent::addRDF(parent::describeIndividual($rp_id, $label, parent::getVoc() . "Responsible-Party") . parent::describeClass(parent::getVoc() . "Responsible-Party", "Responsible Party") . parent::triplify($study_id, parent::getVoc() . "responsible-party", $rp_id) . parent::triplify($rp_id, parent::getVoc() . "organization", $org_id) . parent::describeIndividual($org_id, $this->getString('./organization', $rp), parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplifyString($rp_id, parent::getVoc() . "name-title", $this->getString('./name_title', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "party-type", $this->getString('./party_type', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "investigator-affiliation", $this->getString('./investigator_affiliation', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "investigator-full-name", $this->getString('./investigator_full_name', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "investigator-title", $this->getString('./investigator_title', $rp)));
             }
         } catch (Exception $e) {
             echo "There was an error parsing the responsible_party element: {$e}\n";
         }
         ##############################################################################
         # keywords
         ##############################################################################
         try {
             $keywords = $root->xpath('//keyword');
             foreach ($keywords as $keyword) {
                 parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "keyword", (string) $keyword));
             }
         } catch (Exception $e) {
             echo "There was an error parsing the keywords element: {$e}";
         }
         # mesh terms
         # note: mesh terms are assigned using an imperfect algorithm
         try {
             $mesh_terms = $root->xpath('//condition_browse/mesh_term');
             foreach ($mesh_terms as $mesh_term) {
                 $term = (string) $mesh_term;
                 $mesh_id = parent::getRes() . md5($term);
                 parent::addRDF(parent::triplify($study_id, parent::getVoc() . "condition-mesh", $mesh_id));
                 parent::addRDF(parent::triplifyString($mesh_id, "rdfs:label", $term));
             }
         } catch (Exception $e) {
             echo "There was an error in mesh_terms: {$e}\n";
         }
         ################################################################################
         # regulated by fda?  is section 801? has expanded access?
         ################################################################################
         try {
             parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "is-fda-regulated", $this->getString('is_fda_regulated')) . parent::triplifyString($study_id, parent::getVoc() . "is-section-801", $this->getString('is_section_801')) . parent::triplifyString($study_id, parent::getVoc() . "has-expanded-access", $this->getString('has_expanded_access')));
         } catch (Exception $e) {
             echo "There was an error parsing the is_fda_regulated element: {$e}\n";
         }
         ###############################################################################
         # mesh terms for the intervention browse
         ###############################################################################
         try {
             $a = array("condition_browse", "intervention_browse");
             foreach ($a as $browse_type) {
                 $terms = $root->xpath("//{$browse_type}/mesh_term");
                 foreach ($terms as $term) {
                     $term_label = (string) $term;
                     $term_id = parent::getRes() . md5($term);
                     parent::addRDF(parent::describeIndividual($term_id, $term_label, parent::getVoc() . "Term") . parent::describeClass(parent::getVoc() . "Term", "Term") . parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $browse_type), $term_id));
                 }
             }
         } catch (Exception $e) {
             echo "There was an error parsing {$browse_type}/mesh_term element: {$e}\n";
         }
         ################################################################################
         # clinical results
         ################################################################################
         try {
             $cr = @array_shift($root->xpath('//clinical_results'));
             if ($cr) {
                 $cr_id = parent::getRes() . md5($study_id . $cr->asXML());
                 parent::addRDF(parent::describeIndividual($cr_id, "clinical results for {$study_id}", parent::getVoc() . "Clinical-Result") . parent::describeClass(parent::getVoc() . "Clinical-Result", "Clinical Result") . parent::triplifyString($cr_id, parent::getVoc() . "description", $this->getString('./desc', $cr)) . parent::triplifyString($cr_id, parent::getVoc() . "restrictive-agreement", $this->getString('./restrictive_agreement', $cr)) . parent::triplifyString($cr_id, parent::getVoc() . "limitations-and-caveats", $this->getString('./limitations_and_caveats', $cr)) . parent::triplify($study_id, parent::getVoc() . "clinical-result", $cr_id));
             }
         } catch (Exception $e) {
             echo "There was an error parsing clinical results: {$e}\n";
         }
         ################################################################################
         # Participant Flow
         ################################################################################
         try {
             $pc = 1;
             $mc = 1;
             $wc = 1;
             $pf = @array_shift($root->xpath('//clinical_results/participant_flow'));
             if ($pf) {
                 $pf_id = parent::getRes() . md5($pf->asXML());
                 parent::addRDF(parent::describeIndividual($pf_id, "participant flow for {$study_id}", parent::getVoc() . "Participant-Flow") . parent::describeClass(parent::getVoc() . "Participant-Flow", "Participant-Flow") . parent::triplify($study_id, parent::getVoc() . "participant-flow", $pf_id) . parent::triplifyString($pf_id, parent::getVoc() . "recruitment-details", $this->getString('./recruitment_details', $pf)) . parent::triplifyString($pf_id, parent::getVoc() . "pre-assignment-details", $this->getString('./pre_assignment_details', $pf)));
                 $groups = @array_shift($pf->xpath('./group_list'));
                 foreach ($groups as $group) {
                     parent::addRDF(parent::triplify($pf_id, parent::getVoc() . "group", $this->makeGroup($group)));
                 }
                 //period_list
                 $periods = @array_shift($pf->xpath('./period_list'));
                 foreach ($periods as $period) {
                     $period_id = parent::getRes() . $nct_id . "/period/" . $pc++;
                     $period_title = $this->getString('./title', $period);
                     parent::addRDF(parent::describeIndividual($period_id, $period_title . " for {$nct_id}", parent::getVoc() . "Period") . parent::describeClass(parent::getVoc() . "Period", "Period") . parent::triplify($pf_id, parent::getVoc() . "period", $period_id));
                     // milestones
                     $milestones = @array_shift($period->xpath('./milestone_list'));
                     if ($milestones) {
                         foreach ($milestones as $milestone) {
                             $milestone_id = parent::getRes() . $nct_id . "/milestone/" . $mc++;
                             $label = $this->getString('./title', $milestone);
                             parent::addRDF(parent::describeIndividual($milestone_id, $label, parent::getVoc() . "Milestone") . parent::describeClass(parent::getVoc() . "Milestone", "Milestone") . parent::triplify($period_id, parent::getVoc() . "milestone", $milestone_id));
                             // participants
                             $p = 1;
                             $ps_list = @array_shift($milestone->xpath('./participants_list'));
                             foreach ($ps_list as $ps) {
                                 $ps_id = $milestone_id . "/p/" . $p++;
                                 $group_id = parent::getRes() . $this->nct_id . "/group/" . $ps->attributes()->group_id;
                                 $count = (string) $ps->attributes()->count;
                                 parent::addRDF(parent::describeIndividual($ps_id, "participant counts in " . $ps->attributes()->group_id . " for milestone {$mc} of {$nct_id}", parent::getVoc() . "Participant-Count") . parent::describeClass(parent::getVoc() . "Participant-Count", "Participant Count") . parent::triplify($ps_id, parent::getVoc() . "group", $group_id) . parent::triplifyString($ps_id, parent::getVoc() . "count", $count) . parent::triplify($milestone_id, parent::getVoc() . "participant-counts", $ps_id));
                             }
                         }
                     }
                     // milestones
                     $withdraws = @array_shift($period->xpath('./drop_withdraw_reason_list'));
                     if ($withdraws) {
                         foreach ($withdraws as $withdraw) {
                             $wid = parent::getRes() . $this->nct_id . "/withdraw/" . $wc++;
                             $label = $this->getString('./title', $withdraw);
                             parent::addRDF(parent::describeIndividual($wid, $label, parent::getVoc() . "Withdraw-Reason") . parent::describeClass(parent::getVoc() . "Withdraw-Reason", "Withdraw Reason"));
                             // participants
                             $ps_list = @array_shift($withdraw->xpath('./participants_list'));
                             foreach ($ps_list as $ps) {
                                 $group_id = parent::getRes() . $nct_id . "/group/" . $ps->attributes()->group_id;
                                 $count = (string) $ps->attributes()->count;
                                 parent::addRDF(parent::triplify($wid, parent::getVoc() . "group", $group_id) . parent::triplifyString($wid, parent::getVoc() . "count", $count));
                             }
                         }
                     }
                 }
             }
         } catch (Exception $e) {
             echo "There was an error parsing participant flow element: {$e}\n";
         }
         ################################################################################
         # baseline
         ################################################################################
         try {
             $baseline = @array_shift($root->xpath('//baseline'));
             if ($baseline) {
                 $b_id = $this->nct_id . "/baseline";
                 $b_uri = parent::getRes() . $b_id;
                 // group list
                 $groups = @array_shift($baseline->xpath('./group_list'));
                 foreach ($groups as $group) {
                     parent::addRDF(parent::describeIndividual($b_uri, "baseline for {$nct_id}", parent::getVoc() . "Baseline") . parent::describeClass(parent::getVoc() . "Baseline", "Baseline") . parent::triplify($b_uri, parent::getVoc() . "group", $this->makeGroup($group)) . parent::triplify($study_id, parent::getVoc() . "baseline", $b_uri));
                 }
                 // measure list
                 $measures = @array_shift($baseline->xpath('./measure_list'));
                 foreach ($measures as $measure) {
                     parent::addRDF(parent::triplify($b_uri, parent::getVoc() . "measure", $this->makeMeasure($measure)));
                 }
             }
         } catch (Exception $e) {
             echo "Error in parsing baseline" . PHP_EOL;
         }
         ################################################################################
         # outcomes
         ################################################################################
         try {
             $outcomes = @array_shift($root->xpath('//outcome_list'));
             if ($outcomes) {
                 foreach ($outcomes as $i => $outcome) {
                     $outcome_id = $this->nct_id . "/outcome/" . ($i + 1);
                     $outcome_uri = parent::getRes() . $outcome_id;
                     $outcome_label = $this->getString("./title", $outcome);
                     if (!$outcome_label) {
                         $outcome_label = "outcome for " . $this->nct_id;
                     }
                     parent::addRDF(parent::describeIndividual($outcome_uri, $outcome_label, parent::getVoc() . "Outcome", $this->getString("./description", $outcome)) . parent::describeClass(parent::getVoc() . "Outcome", "Outcome") . parent::triplify($study_id, parent::getVoc() . "outcome", $outcome_uri) . parent::triplifyString($outcome_uri, parent::getVoc() . "type", $this->getString("./type", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "time-frame", $this->getString("./time_frame", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "safety-issue", $this->getString("./safety_issue", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "posting-date", $this->getString("./posting-date", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "population", $this->getString("./population", $outcome)));
                     $groups = @array_shift($outcome->xpath('./group_list'));
                     if ($groups) {
                         foreach ($groups as $group) {
                             parent::addRDF(parent::triplify($outcome_uri, parent::getVoc() . "group", $this->makeGroup($group)));
                         }
                     }
                     // measure list
                     $measures = @array_shift($outcome->xpath('./measure_list'));
                     if ($measures) {
                         foreach ($measures as $measure) {
                             parent::addRDF(parent::triplify($outcome_uri, parent::getVoc() . "measure", $this->makeMeasure($measure)));
                         }
                     }
                     // analysis list
                     $analyses = @array_shift($outcome->xpath('./analysis_list'));
                     if ($analyses) {
                         foreach ($analyses as $analysis) {
                             parent::addRDF(parent::triplify($outcome_uri, parent::getVoc() . "analysis", $this->makeAnalysis($analysis)));
                         }
                     }
                 }
             }
         } catch (Exception $e) {
             echo "Error in parsing outcomes" . PHP_EOL;
         }
         ################################################################################
         # events
         ################################################################################
         try {
             $c_ev = $c_c = 1;
             $reported_events = @array_shift($root->xpath('//reported_events'));
             if ($reported_events) {
                 $rp_id = parent::getRes() . md5($reported_events->asXML());
                 $groups = @array_shift($reported_events->xpath('./group_list'));
                 parent::addRDF(parent::describeIndividual($rp_id, "Reported events for {$nct_id}", parent::getVoc() . "Reported-Events") . parent::describeClass(parent::getVoc() . "Reported-Events", "Reported Events") . parent::triplify($study_id, parent::getVoc() . "reported-events", $rp_id));
                 foreach ($groups as $group) {
                     parent::addRDF(parent::triplify($rp_id, parent::getVoc() . "group", $this->makeGroup($group)));
                 }
                 // events
                 $event_list = array("serious_events" => "Serious Event", "other_events" => "Other Event");
                 foreach ($event_list as $ev => $ev_label) {
                     $et = @array_shift($reported_events->xpath('./' . $ev));
                     if (!$et) {
                         continue;
                     }
                     $ev_uri = parent::getVoc() . str_replace(" ", "-", $ev_label);
                     $categories = @array_shift($et->xpath('./category_list'));
                     foreach ($categories as $category) {
                         $major_title = $this->getString('./title', $category);
                         $major_title_uri = parent::getRes() . md5($major_title);
                         $events = @array_shift($category->xpath('./event_list'));
                         foreach ($events as $event) {
                             $e_uri = parent::getRes() . $this->nct_id . "/{$ev}/" . $c_ev++;
                             $subtitle = (string) $this->getString('./sub_title', $event) . " for " . $this->nct_id;
                             $subtitle_uri = parent::getRes() . md5($subtitle);
                             parent::addRDF(parent::describeIndividual($e_uri, $subtitle, $ev_uri) . parent::describeClass($ev_uri, $ev_label) . parent::triplify($e_uri, parent::getVoc() . "sub-title", $subtitle_uri) . parent::describeIndividual($subtitle_uri, $subtitle, parent::getVoc() . "Event") . parent::describeClass(parent::getVoc() . "Event", "Event") . parent::triplify($e_uri, parent::getVoc() . "major-title", $major_title_uri) . parent::describeClass($major_title_uri, $major_title) . parent::triplify($rp_id, parent::getVoc() . str_replace("_", "-", $ev), $e_uri));
                             $counts = $event->xpath('./counts');
                             foreach ($counts as $c) {
                                 $group_id = $c->attributes()->group_id;
                                 $group_uri = parent::getRes() . $nct_id . "/group/" . $group_id;
                                 $c_uri = $e_uri . "/count/" . $c_c++;
                                 parent::addRDF(parent::describeIndividual($c_uri, $subtitle . " for " . $group_id . " in " . $this->nct_id, parent::getVoc() . "Event-Count") . parent::describeClass(parent::getVoc() . "Event-Count", "Event Count") . parent::triplify($c_uri, parent::getVoc() . "group", $group_uri) . parent::triplify($e_uri, parent::getVoc() . "count", $c_uri) . parent::triplifyString($c_uri, parent::getVoc() . "default-vocabulary", $this->getString('./default_vocab', $et)) . parent::triplifyString($c_uri, parent::getVoc() . "frequency-threshold", $this->getString('./frequency_threshold', $et)) . parent::triplifyString($c_uri, parent::getVoc() . "default-assessment", $this->getString('./default_assessment', $et)) . parent::triplifyString($c_uri, parent::getVoc() . "number-events", $c->attributes()->events) . parent::triplifyString($c_uri, parent::getVoc() . "subjects-affected", $c->attributes()->subjects_affected) . parent::triplifyString($c_uri, parent::getVoc() . "subjects-at-risk", $c->attributes()->subjects_at_risk));
                             }
                         }
                     }
                 }
             }
         } catch (Exception $e) {
             echo "Error in parsing reported events" . PHP_EOL;
         }
         parent::writeRDFBufferToWriteFile();
     }
     $this->setCheckPoint('record');
     $this->setCheckPoint('dataset');
 }
Example #15
0
 private function process()
 {
     $z = 0;
     $y = 1;
     while ($l = $this->getReadFile()->Read(200000)) {
         if ($z++ % 1000000 == 0) {
             echo $z . PHP_EOL;
             $odir = parent::getParameterValue('outdir');
             $ofile = 'iproclass.' . $y++ . "." . parent::getParameterValue('output_format');
             $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
             if (parent::getWriteFile() != null) {
                 parent::getWriteFile()->close();
                 parent::clear();
             }
             // generate a new file
             parent::setWriteFile($odir . $ofile, $gz);
         }
         $fields = explode("\t", $l);
         @($uniprot_acc = $fields[0]);
         @($uniprot = $fields[1]);
         @($gene = $fields[2]);
         @($refseq = $fields[3]);
         @($gi = $fields[4]);
         @($pdb = $fields[5]);
         @($pfam = $fields[6]);
         @($go = $fields[7]);
         @($pirsf = $fields[8]);
         @($ipi = $fields[9]);
         @($uniref_100 = $fields[10]);
         @($uniref_90 = $fields[11]);
         @($uniref_50 = $fields[12]);
         @($uniparc = $fields[13]);
         //skipping pir-psd because db no longer maintained
         @($ncbi_taxonomy = $fields[15]);
         @($mim = $fields[16]);
         @($unigene = $fields[17]);
         @($ensembl = $fields[18]);
         @($pubmed = $fields[19]);
         @($embl_genbank_ddbj = $fields[20]);
         @($embl_protein = trim($fields[21]));
         $id = $uniprot_acc;
         $id_res = $this->getNamespace() . $id;
         $id_label = "iproclass entry for uniprot:{$uniprot_acc}";
         parent::addRDF(parent::triplify($id_res, $this->getVoc() . "x-uniprot", "uniprot:" . $uniprot_acc));
         if (!empty($uniprot)) {
             $uniprot_ids = explode("; ", $uniprot);
             foreach ($uniprot_ids as $uniprot_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-uniprot", "uniprot:" . $uniprot_id));
             }
         }
         if (!empty($gene)) {
             $gene_ids = explode("; ", $gene);
             foreach ($gene_ids as $gene_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-ncbigene", "geneid:" . $gene_id));
             }
         }
         if (!empty($refseq)) {
             $refseq_ids = explode("; ", $refseq);
             foreach ($refseq_ids as $refseq_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-refseq", "refseq:" . $refseq_id));
             }
         }
         if (!empty($gi)) {
             $gi_ids = explode("; ", $gi);
             foreach ($gi_ids as $gi_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-gi", "gi:" . $gi_id));
             }
         }
         if (!empty($pdb)) {
             $pdb_ids = explode("; ", $pdb);
             foreach ($pdb_ids as $pdb_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-pdb", "pdb:" . $pdb_id));
             }
         }
         if (!empty($pfam)) {
             $pfam_ids = explode("; ", $pfam);
             foreach ($pfam_ids as $pfam_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-pfam", "pfam:" . $pfam_id));
             }
         }
         if (!empty($go)) {
             $go_ids = explode("; ", $go);
             foreach ($go_ids as $go_id) {
                 $go_id = substr($go_id, 3);
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-go", "go:" . $go_id));
             }
         }
         if (!empty($pirsf)) {
             $pirsf_ids = explode("; ", $pirsf);
             foreach ($pirsf_ids as $pirsf_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-pirsf", "pirsf:" . $pirsf_id));
             }
         }
         if (!empty($ipi)) {
             $ipi_ids = explode("; ", $ipi);
             foreach ($ipi_ids as $ipi_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-ipi", "ipi:" . $ipi_id));
             }
         }
         if (!empty($uniref_100)) {
             $uniref_100_ids = explode("; ", $uniref_100);
             foreach ($uniref_100_ids as $uniref_100_id) {
                 parent::AddRDF(parent::QQuadO_URL($id_res, "rdfs:seeAlso", "http://uniprot.org/uniref/" . $uniref_100_id));
             }
         }
         if (!empty($uniref_90)) {
             $uniref_90_ids = explode("; ", $uniref_90);
             foreach ($uniref_90_ids as $uniref_90_id) {
                 parent::AddRDF(parent::QQuadO_URL($id_res, "rdfs:seeAlso", "http://uniprot.org/uniref/" . $uniref_90_id));
             }
         }
         if (!empty($uniref_50)) {
             $uniref_50_ids = explode("; ", $uniref_50);
             foreach ($uniref_50_ids as $uniref_50_id) {
                 parent::AddRDF(parent::QQuadO_URL($id_res, "rdfs:seeAlso", "http://uniprot.org/uniref/" . $uniref_50_id));
             }
         }
         if (!empty($uniparc)) {
             $uniparc_ids = explode("; ", $uniparc);
             foreach ($uniparc_ids as $uniparc_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-uniparc", "uniparc:" . $uniparc_id) . parent::QQuadO_URL($id_res, "rdfs:seeAlso", "http://uniprot.org/uniparc/" . $uniparc_id));
             }
         }
         if (!empty($ncbi_taxonomy)) {
             $taxonomy_ids = explode("; ", $ncbi_taxonomy);
             foreach ($taxonomy_ids as $taxonomy_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-taxon", "taxon:" . $taxonomy_id));
             }
         }
         if (!empty($mim)) {
             $mim_ids = explode("; ", $mim);
             foreach ($mim_ids as $mim_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-omim", "omim:" . $mim_id));
             }
         }
         if (!empty($unigene)) {
             $unigene_ids = explode("; ", $unigene);
             foreach ($unigene_ids as $unigene_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-unigene", "unigene:" . $unigene_id));
             }
         }
         if (!empty($ensembl)) {
             $ensembl_ids = explode("; ", $ensembl);
             foreach ($ensembl_ids as $ensembl_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-ensembl", "ensembl:" . $ensembl_id));
             }
         }
         if (!empty($pubmed)) {
             $pubmed_ids = explode("; ", $pubmed);
             foreach ($pubmed_ids as $pubmed_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-pubmed", "pubmed:" . $pubmed_id));
             }
         }
         if (!empty($embl_genbank_ddbj)) {
             $genbank_ids = explode("; ", $embl_genbank_ddbj);
             foreach ($genbank_ids as $genbank_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-genbank", "genbank:" . $genbank_id));
             }
         }
         if (!empty($embl_protein)) {
             $embl_protein_ids = explode(";", $embl_protein);
             foreach ($embl_protein_ids as $embl_protein_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-genbank", "genbank:" . $embl_protein_id));
             }
         }
         //write rdf to file
         $this->WriteRDFBufferToWriteFile();
     }
     //while
 }
Example #16
0
 function Run()
 {
     // directory shortcuts
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     // get the work specified
     $list = trim(parent::getParameterValue('files'));
     if ($list == 'all') {
         // call the getAllModelsId webservice
         $file = $ldir . "all_models.json";
         if (!file_exists($file)) {
             try {
                 $x = @new SoapClient("http://www.ebi.ac.uk/biomodels-main/services/BioModelsWebServices?wsdl");
             } catch (Exception $e) {
                 echo $e->getMessage();
             }
             $entries = $x->getAllModelsId();
             file_put_contents($file, json_encode($entries));
         } else {
             $entries = json_decode(file_get_contents($file));
         }
     } elseif ($list == 'curated') {
         // call the getAllCuratedModelsId webservice
         $file = $ldir . "curated_models.json";
         if (!file_exists($file)) {
             try {
                 $x = @new SoapClient("http://www.ebi.ac.uk/biomodels-main/services/BioModelsWebServices?wsdl");
             } catch (Exception $e) {
                 echo $e->getMessage();
             }
             $entries = $x->getAllCuratedModelsId();
             file_put_contents($file, json_encode($entries));
         } else {
             $entries = json_decode(file_get_contents($file));
         }
     } else {
         // check if a hyphenated list was provided
         if (($pos = strpos($list, "-")) !== FALSE) {
             $start_range = substr($list, 0, $pos);
             $end_range = substr($list, $pos + 1);
             for ($i = $start_range; $i <= $end_range; $i++) {
                 $entries[] = "BIOMD" . str_pad($i, 10, "0", STR_PAD_LEFT);
             }
         } else {
             // for comma separated list
             $b = explode(",", $this->GetParameterValue('files'));
             foreach ($b as $e) {
                 $entries[] = "BIOMD" . str_pad($e, 10, "0", STR_PAD_LEFT);
             }
         }
     }
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     // set the write file
     $suffix = parent::getParameterValue('output_format');
     $outfile = 'biomodels' . '.' . $suffix;
     $gz = false;
     if (strstr(parent::getParameterValue('output_format'), "gz")) {
         $gz = true;
     }
     $dataset_description = '';
     parent::setWriteFile($odir . $outfile, $gz);
     // iterate over the entries
     $i = 0;
     $total = count($entries);
     foreach ($entries as $id) {
         echo "processing " . ++$i . " of {$total} - biomodel# " . $id;
         $download_file = $ldir . $id . ".owl.gz";
         $url = parent::getParameterValue('download_url') . "publ/{$id}/{$id}-biopax3.owl";
         // download if the file doesn't exist or we are told to
         if (!file_exists($download_file) || $this->GetParameterValue('download') == 'true') {
             // download
             echo " - downloading";
             $ret = utils::downloadsingle($url, 'compress.zlib://' . $download_file, true);
             if ($ret === false) {
                 echo "\nTrying non-curated model";
                 $url = parent::getParametervalue('download_url') . "uncura_publ/{$id}/{$id}-biopax3.owl";
                 $ret = utils::downloadsingle($url, 'compress.zlib://' . $download_file, true);
                 if ($ret === false) {
                     continue;
                 }
             }
             echo " - downloaded";
         }
         // load entry, parse and write to file
         echo " - parsing... ";
         // $this->SetReadFile($download_file,true);
         $buf = file_get_contents("compress.zlib://" . $download_file);
         $converter = new BioPAX2Bio2RDF($this);
         $converter->SetBuffer($buf)->SetBioPAXVersion(3)->SetBaseNamespace("http://identifiers.org/biomodels.db/{$id}/")->SetBio2RDFNamespace("http://bio2rdf.org/biomodels:" . $id . "_")->SetDatasetURI($this->GetDatasetURI());
         $rdf = $converter->Parse();
         parent::addRDF($rdf);
         parent::writeRDFBufferToWriteFile();
         //generate dataset description
         $source_file = (new DataResource($this))->setURI($url)->setTitle("EBI BioModels Database - BioModel # {$id}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($download_file)))->setFormat("rdf/xml")->setPublisher("http://www.ebi.ac.uk/")->setHomepage("http://www.ebi.ac.uk/biomodels-main/")->setRights("use-share-modify")->setLicense("http://www.ebi.ac.uk/biomodels-main/termsofuse")->setDataset("http://identifiers.org/biomodels.db/");
         $dataset_description .= $source_file->toRDF();
         echo "done!" . PHP_EOL;
     }
     //foreach
     parent::getWriteFile()->close();
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/biomodels/biomodels.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description .= $output_file->toRDF();
     //write dataset description to file
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Example #17
0
 function process_file($infile)
 {
     $odir = parent::getParameterValue('outdir');
     $suffix = parent::getParameterValue('output_format');
     $ofile = $odir . basename($infile, ".xml.gz") . '.' . $suffix;
     $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
     $fp = gzopen($infile, "r") or die("Could not open file " . $infile . "!\n");
     $this->setReadFile($infile);
     $this->getReadFile()->setFilePointer($fp);
     $this->setWriteFile($ofile, $gz);
     $this->setCheckPoint('file');
     $this->pubmed();
     $this->writeRDFBufferToWriteFile();
     $this->getWriteFile()->close();
 }
Example #18
0
 function Run()
 {
     $sp = trim(parent::getParameterValue('files'));
     if ($sp == 'all') {
         $files = $this->getPackageMap();
     } else {
         $s_a = explode(",", $sp);
         $pm = $this->getPackageMap();
         $files = array();
         foreach ($s_a as $a) {
             if (array_key_exists($a, $pm)) {
                 $files[$a] = $pm[$a];
             }
         }
     }
     //else
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $dd = '';
     //now iterate over the files array
     $year = parent::getParameterValue('year');
     foreach ($files as $k => $fpattern) {
         $file = str_replace("YEAR", $year, $fpattern);
         $lfile = $ldir . $file;
         $rfile = parent::getParameterValue("download_url") . $file;
         // download if necessary
         if (!file_exists($lfile) || parent::getParameterValue('download') == "true") {
             echo "Downloading {$file} ... ";
             $ret = utils::downloadSingle($rfile, $lfile);
             if ($ret === FALSE) {
                 trigger_error("Unable to get {$file}", E_USER_ERROR);
                 continue;
             }
             echo "done!" . PHP_EOL;
         }
         //set the outfile
         $ofile = "mesh_" . $k . "." . parent::getParameterValue('output_format');
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
         echo "processing {$k} ...";
         parent::setReadFile($lfile, FALSE);
         parent::setWriteFile($odir . $ofile, $gz);
         $fnx = $k;
         $this->{$fnx}();
         parent::writeRDFBufferToWriteFile();
         parent::getWriteFile()->close();
         echo "done!" . PHP_EOL;
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("MeSH")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/x-mesh-record")->setPublisher("http://www.nlm.nih.gov")->setHomepage("http://www.nlm.nih.gov/mesh/")->setRights("use")->setLicense("http://www.nlm.nih.gov/databases/download.html")->setDataset("http://identifiers.org/mesh/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = parent::getDate(filemtime($odir . $ofile));
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/mesh/mesh.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dd .= $source_file->toRDF() . $output_file->toRDF();
     }
     //foreach
     parent::setWriteFile($odir . $this->getBio2RDFReleaseFile($this->getNamespace()));
     parent::getWriteFile()->write($dd);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Example #19
0
 function run()
 {
     if (parent::getParameterValue('files') == 'all') {
         $files = explode("|", $this->GetParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(",", parent::getParameterValue('files'));
     }
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     $dataset_description = '';
     foreach ($files as $file) {
         $download = parent::getParameterValue('download');
         $lfile = $ldir . "goa_" . $file . ".gz";
         if (!file_exists($lfile) && $download == false) {
             trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE);
             $download = true;
         }
         //download file
         $rfile = $rdir . strtoupper($file) . "/gene_association.goa_" . $file . ".gz";
         if ($download == true) {
             echo "downloading {$file} ... ";
             //file_put_contents($lfile,file_get_contents($rfile));
             utils::DownloadSingle($rfile, $lfile);
         }
         $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
         $ofile = "goa_" . $file . "." . parent::getParameterValue('output_format');
         parent::setReadFile($lfile, TRUE);
         parent::setWriteFile($odir . $ofile, $gz);
         echo "processing {$file} ... ";
         $this->process($file);
         echo "done!";
         parent::clear();
         //close write file
         parent::getWriteFile()->close();
         echo PHP_EOL;
         // dataset description
         $graph_uri = parent::getGraphURI();
         if (parent::getParameterValue('dataset_graph') == true) {
             parent::setGraphURI(parent::getDatasetURI());
         }
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Gene Ontology Annotation file {$file} ({$rfile}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://www.ebi.ac.uk/")->setHomepage("http://www.ebi.ac.uk/GOA/")->setRights("use")->setLicense("http://www.ebi.ac.uk/GOA/goaHelp.html")->setDataset("http://identifiers.org/goa/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/irefindex/irefindex.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
         parent::setGraphURI($graph_uri);
     }
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Example #20
0
 function getListOfEntries($ldir)
 {
     // get the master list of entries
     $file = "mim2gene.txt";
     if (!file_exists($ldir . $file)) {
         trigger_error($ldir . $file . " not found. Will attempt to download. ", E_USER_NOTICE);
         $this->SetParameterValue('download', true);
     }
     if (parent::getParameterValue('download') == true) {
         // connect
         if (!isset($ftp)) {
             $host = 'ftp.omim.org';
             echo "connecting to {$host} ...";
             $ftp = ftp_connect($host);
             if (!$ftp) {
                 echo "Unable to connect to {$host}" . PHP_EOL;
                 die;
             }
             ftp_pasv($ftp, true);
             $login = ftp_login($ftp, 'anonymous', '*****@*****.**');
             if (!$ftp || !$login) {
                 echo "FTP-connect failed!";
                 die;
             } else {
                 echo "Connected" . PHP_EOL;
             }
         }
         // download
         ftp_pasv($ftp, true);
         echo "Downloading {$file} ...";
         if (ftp_get($ftp, $ldir . $file, 'OMIM/' . $file, FTP_BINARY) === FALSE) {
             trigger_error("Error in downloading {$file}");
             continue;
         }
         if (isset($ftp)) {
             ftp_close($ftp);
         }
         echo "success!" . PHP_EOL;
     }
     // parse the mim2gene file for the entries
     // # Mim Number    Type    Gene IDs        Approved Gene Symbols
     $fp = fopen($ldir . $file, "r");
     fgets($fp);
     while ($l = fgets($fp)) {
         $a = explode("\t", $l);
         if ($a[1] != "moved/removed") {
             $list[$a[0]] = $a[1];
         }
     }
     fclose($fp);
     return $list;
 }
Example #21
0
 function process()
 {
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     //which files are to be converted?
     $files = trim($this->GetParameterValue('files'));
     if ($files == 'all') {
         $files = $this->getPackageMap();
     } else {
         $sel_arr = explode(",", $files);
         $pm = $this->getPackageMap();
         $files = array();
         foreach ($sel_arr as $a) {
             if (array_key_exists($a, $pm)) {
                 $files[$a] = $pm[$a];
             }
         }
     }
     if ($this->getParameterValue('limit_organisms') == true) {
         $this->taxids = array_flip(explode(",", $this->getParameterValue('organisms')));
     }
     //set dataset graph to be dataset URI
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $dataset_description = '';
     //now iterate over the files array
     foreach ($files as $module => $rfilename) {
         $file = $module . ".gz";
         $lfile = $ldir . $file;
         $rfile = $rdir . $rfilename;
         // download
         if (!file_exists($lfile) || parent::getParameterValue('download') == true) {
             trigger_error("{$lfile} not found. Will attempt to download.", E_USER_NOTICE);
             $myfile = $lfile;
             if ($module == "gene2sts" || $module == "gene2unigene") {
                 $myfile = "compress.zlib://" . $lfile;
             }
             echo "downloading {$module} ...";
             utils::DownloadSingle($rfile, $myfile);
             echo "done" . PHP_EOL;
         }
     }
     foreach ($files as $module => $rfilename) {
         $file = $module . ".gz";
         $lfile = $ldir . $file;
         $rfile = $rdir . $rfilename;
         $ofile = $module . "." . parent::getParameterValue('output_format');
         $gz = false;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         echo "Processing {$module} ... ";
         parent::setReadFile($lfile, true);
         parent::setWriteFile($odir . $ofile, $gz);
         $fnx = $module;
         if ($module == 'gene2refseq') {
             $fnx = 'gene2accession';
         }
         $this->{$fnx}();
         parent::clear();
         echo 'done!' . PHP_EOL;
         parent::getReadFile()->close();
         parent::getWriteFile()->close();
         // generate the dataset release file
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("NCBI Gene ({$module})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://www.ncbi.nlm.nih.gov")->setHomepage("http://www.ncbi.nlm.nih.gov/gene")->setRights("use-share-modify")->setLicense("http://www.ncbi.nlm.nih.gov/About/disclaimer.html")->setDataset("http://identifiers.org/ncbigene/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ncbigene/ncbigene.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     }
     //foreach
     //set graph URI back to default value
     parent::setGraphURI($graph_uri);
     //write dataset description to file
     echo "Generating dataset description... ";
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Example #22
0
 public function Run()
 {
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     // make sure we have the zip archive
     //which files are to be converted?
     $selectedPackage = trim(parent::getParameterValue('files'));
     if ($selectedPackage == 'all') {
         $files = $this->getPackageMap();
     } else {
         $sel_arr = explode(",", $selectedPackage);
         $pm = $this->getPackageMap();
         $files = array();
         foreach ($sel_arr as $a) {
             if (array_key_exists($a, $pm)) {
                 $files[$a] = $pm[$a];
             }
         }
     }
     $dataset_description = '';
     foreach ($files as $key => $value) {
         $lfile = $ldir . $value['filename'];
         if (!file_exists($lfile) && parent::getParameterValue('download') == false) {
             trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE);
             $this->SetParameterValue('download', true);
         }
         //download all files [except mapping file]
         if ($this->GetParameterValue('download') == true) {
             $rfile = $value["file_url"];
             echo "downloading " . var_dump($value["file_url"]) . " ... ";
             utils::downloadSingle($rfile, $lfile);
         }
         if ($key == "taxdmp" || $key == "gi2taxid_protein" || $key == "gi2taxid_nucleotide") {
             //get the name of the zip archive
             $lfile = $value["filename"];
             // make sure we have the zip archive
             $zinfile = $ldir . $lfile;
             $zin = new ZipArchive();
             if ($zin->open($zinfile) === FALSE) {
                 trigger_error("Unable to open {$zinfile}");
                 exit;
             }
             //now iterate over the files in the ziparchive
             $source_file = (new DataResource($this))->setURI($value['file_url'])->setTitle('NCBI Taxonomy - ' . $key)->setRetrievedDate(date("Y-m-d\\TH:i:sP", filemtime($ldir . $lfile)))->setFormat('text/tab-separated-value')->setFormat('application/zip')->setPublisher('http://www.ncbi.nlm.nih.gov')->setHomepage('http://www.ncbi.nlm.nih.gov/taxonomy')->setRights('use')->setRights('attribution')->setLicense('https://www.nlm.nih.gov/copyright.html')->setDataset(parent::getDatasetURI());
             $prefix = parent::getPrefix();
             $bVersion = parent::getParameterValue('bio2rdf_release');
             $date = date("Y-m-d\\TH:i:sP");
             $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$key}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/taxonomy/taxonomy.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("restricted-by-source-license")->setLicense("http://creativecommons/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
             $dataset_description .= $output_file->toRDF() . $source_file->toRDF();
             foreach ($value["contents"] as $k => $fn) {
                 if ($k == "names" || $k == "nodes" || $k == "citations" || $k == "gencode" || $k == "division" || $k == "gi_taxid_prot" || $k == "gi_taxid_nucl") {
                     //if($k !== 'citations') continue;
                     $fpin = $zin->getStream($fn);
                     if (!$fpin) {
                         trigger_error("Unable to get pointer to {$fn} in {$zinfile}");
                         exit("failed\n");
                     }
                     $gzoutfile = $odir . "taxonomy-{$k}" . "." . parent::getParameterValue('output_format');
                     //set the write file
                     $gz = strstr(parent::getParameterValue('output_format'), 'gz') ? true : false;
                     parent::setReadFile($ldir . $lfile);
                     parent::getReadFile()->SetFilePointer($fpin);
                     parent::setWriteFile($gzoutfile, $gz);
                     echo "processing {$fn}...\n";
                     $this->{$k}();
                     $this->GetWriteFile()->Close();
                     echo "done!" . PHP_EOL;
                     parent::clear();
                 }
                 //if $k
             }
             //foreach
         }
         //if key taxdmp
         $this->setWriteFile($odir . $this->getBio2RDFReleaseFile());
         $this->getWriteFile()->write($dataset_description);
         $this->getWriteFile()->close();
     }
 }
Example #23
0
 function Run()
 {
     $dataset_description = '';
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $list_file = $ldir . "ftp_list.txt";
     if (!file_exists($list_file) || $this->getParameterValue('download') == true) {
         echo "Getting FTP file list ...";
         $list = $this->getFtpFileList('ftp.ncbi.nlm.nih.gov', '/refseq/release/complete/', '/(complete\\.[0-9]+\\.protein\\.gpff\\.gz)/');
         if (!isset($list) or count($list) == 0) {
             trigger_error("Unable to get list of files from FTP site. Check internet connection", E_USER_ERROR);
             exit(-1);
         }
         asort($list);
         $buf = implode("\n", $list);
         file_put_contents($list_file, $buf);
         echo "Done." . PHP_EOL;
     } else {
         echo "Using existing ftp list" . PHP_EOL;
         $list = explode("\n", file_get_contents($list_file));
     }
     $counter = 1;
     $total = count($list);
     foreach ($list as $f) {
         $lfile = $ldir . $f;
         echo "Processing " . $counter++ . "/{$total} {$f}. ";
         if (!file_exists($lfile) || $this->getParameterValue('download') == true) {
             $rfile = parent::getParameterValue('download_url') . $f;
             echo "Downloading ...";
             utils::DownloadSingle($rfile, $lfile);
             echo "done.";
         } else {
             echo "Using existing file.";
         }
         echo PHP_EOL;
     }
     //if download
     //iterate over the files
     $files = $this->getFilePaths($ldir, 'gz');
     asort($files);
     foreach ($files as $f) {
         $lfile = $ldir . $f;
         $ofile = $odir . basename($f, ".gz") . "." . parent::getParameterValue('output_format');
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
         parent::setWriteFile($ofile, $gz);
         parent::setReadFile($lfile, true);
         echo "processing {$f} ...";
         $this->process();
         parent::clear();
         echo "done!" . PHP_EOL;
         $this->getReadFile()->close();
         $this->getWriteFile()->close();
         $source_file = (new DataResource($this))->setURI(parent::getParameterValue('download_url') . $lfile)->setTitle("NCBI RefSeq - {$f}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat('text/refseq-format')->setFormat('application/zip')->setPublisher('http://www.ncbi.nlm.nih.gov')->setHomepage('http://www.ncbi.nlm.nih.gov/refseq')->setRights('use')->setRights('attribution')->setLicense('http://www.nlm.nih.gov/copyright.html')->setDataset(parent::getDatasetURI());
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$f}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/refseq/refseq.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("restricted-by-source-license")->setLicense("http://creativecommons/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         $dataset_description .= $output_file->toRDF() . $source_file->toRDF();
     }
     //for
     parent::writeToReleaseFile($dataset_description);
     parent::getWriteFile()->close();
 }
Example #24
0
 function process()
 {
     if (parent::getParameterValue('files') == 'all') {
         $files = explode("|", parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(",", parent::getParameterValue('files'));
     }
     $remote_files = array("human" => "human_genes.zip", "models" => "models_genes.zip");
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     $dataset_description = '';
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     foreach ($files as $file) {
         $lfile = $ldir . $remote_files[$file];
         $rfile = $rdir . $remote_files[$file];
         if (!file_exists($lfile)) {
             trigger_error($lfile . " not found. Will attempt to download." . PHP_EOL, E_USER_WARNING);
             echo "Downloading {$rfile}... ";
             Utils::DownloadSingle($rfile, $lfile);
             echo "done!" . PHP_EOL;
         }
         $suffix = parent::getParameterValue('output_format');
         $ofile = "genage_" . $file . '.' . $suffix;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         $zin = new ZipArchive();
         if ($zin->open($lfile) === FALSE) {
             trigger_error("Unable to open {$lfile}");
             exit;
         }
         if ($file == "human") {
             $zipentry = "genage_human.csv";
         } else {
             if ($file == "models") {
                 $zipentry = "genage_models.csv";
             }
         }
         if (($fp = $zin->getStream($zipentry)) === FALSE) {
             trigger_error("Unable to get {$zipentry} in ziparchive {$lfile}");
             return FALSE;
         }
         parent::SetReadFile($lfile);
         parent::GetReadFile()->SetFilePointer($fp);
         // set the write file, parse, write and close
         $suffix = parent::getParameterValue('output_format');
         $outfile = "genage_" . $file . '.' . $suffix;
         $gz = false;
         if (strstr($suffix, "gz")) {
             $gz = true;
         }
         parent::setWriteFile($odir . $ofile, $gz);
         echo "Processing {$lfile}... ";
         $fnx = $file;
         $this->{$fnx}();
         echo "done!" . PHP_EOL;
         parent::getWriteFile()->close();
         // generate the dataset release file
         echo "Generating dataset description for {$ofile}... ";
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Human Ageing Genomic Resources GenAge database (" . $remote_files[$file] . ")")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/comma-separated-value")->setFormat("application/gzip")->setPublisher("http://genomics.senescence.info/")->setHomepage("http://genomics.senescence.info/genes/")->setRights("use")->setLicense("http://genomics.senescence.info/legal.html")->setDataset("http://identifiers.org/genage/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/genage/genage.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
         echo "done!" . PHP_EOL;
     }
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
 }
Example #25
0
 /**
  *  Function to begin parsing the local copy of the pubchem substances directory
  **/
 function parse_substances()
 {
     $ignore = array(".", "..");
     $input_dir = $this->getParameterValue('indir') . "/substances/";
     $gz = false;
     $this->CreateDirectory($this->getParameterValue('outdir') . "/substances/");
     parent::setDatasetURI("bio2rdf_dataset:bio2rdf-" . $this->getPcsPrefix() . "-" . date("Ymd"));
     $graph_uri = parent::getGraphURI();
     //set graph URI to dataset uri
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $dataset_description = '';
     if ($handle = opendir($input_dir)) {
         while (false !== ($file = readdir($handle))) {
             if (in_array($file, $ignore)) {
                 continue;
             }
             echo "Processing file: " . $input_dir . $file . PHP_EOL;
             $suffix = parent::getParameterValue('output_format');
             $outfile = realpath($this->getParameterValue('outdir')) . "/substances/" . basename($file, ".xml.gz") . "." . $suffix;
             if (strstr(parent::getParameterValue('output_format'), "gz")) {
                 $gz = true;
             }
             echo "... into " . $outfile . PHP_EOL;
             parent::setCheckpoint('file');
             $this->setWriteFile($outfile, $gz);
             $this->parse_substance_file($input_dir, $file);
             $this->getWriteFile()->close();
         }
         closedir($handle);
         $source_file = (new DataResource($this))->setURI("http://www.ncbi.nlm.nih.gov/pcsubstance")->setTitle("PubChem Substance")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($input_dir)))->setFormat("text/xml")->setFormat("application/zip")->setPublisher("http://ncbi.nlm.nih.gov/")->setHomepage("http://pubchem.ncbi.nlm.nih.gov/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("ftp://ftp.ncbi.nlm.nih.gov/pubchem/README")->setDataset("http://identifiers.org/pubchem.substance/");
         $prefix = $this->getPcsPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pubchem/pubchem.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
         //set graph URI back to default
         parent::setGraphURI($graph_uri);
         // write the dataset description
         $this->setWriteFile($this->getParameterValue('outdir') . "/substances/" . $this->getBio2RDFReleaseFile());
         $this->getWriteFile()->write($dataset_description);
         $this->getWriteFile()->close();
     } else {
         echo "unable to read directory contents: " . $input_dir . "\n";
         exit;
     }
 }
Example #26
0
 function Run()
 {
     // directory shortcuts
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     // get the listings page
     $url = trim(parent::getParameterValue('download_url'));
     $listing_file = $ldir . "probeset_list.html";
     if (!file_exists($listing_file) || parent::getParameterValue("download") == "true") {
         echo "Downloading {$listing_file}" . PHP_EOL;
         Utils::DownloadSingle($url, $listing_file);
     }
     $listings = file_get_contents($listing_file);
     // make a list of the csv.zip files
     preg_match_all("/\"([^\"]+)\\.csv\\.zip\"/", $listings, $m);
     if (count($m[1]) == 0) {
         trigger_error("could not find any .csv.zip files in {$url}");
         exit;
     }
     if (parent::getParameterValue("files") == 'all') {
         $myfiles = $m[1];
     } else {
         $a = explode(",", parent::getParameterValue("files"));
         foreach ($a as $f) {
             $found = false;
             foreach ($m[1] as $n) {
                 if (strstr($n, $f)) {
                     $found = true;
                     $myfiles[] = $n;
                     break;
                 }
             }
             if ($found === false) {
                 echo "cannot find {$f} in list" . PHP_EOL;
             }
         }
     }
     if (!isset($myfiles)) {
         exit;
     }
     // nothing to do
     $dataset_description = '';
     // set the write file
     $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
     $outfile = 'affymetrix.' . parent::getParameterValue('output_format');
     $this->setWriteFile($odir . $outfile, $gz);
     // iterate over the files
     foreach ($myfiles as $rfile) {
         $base_file = substr($rfile, strrpos($rfile, "/") + 1);
         $base_url = substr($rfile, 0, strrpos($rfile, "/"));
         // get and set the dataset version
         if (parent::getDatasetVersion() == null) {
             preg_match("/\\.na([0-9]{2})\\.annot/", $base_file, $m);
             if (isset($m[1])) {
                 $this->setDatasetVersion($m[1]);
             }
         }
         if (parent::getDatasetVersion() != parent::getParameterValue('version')) {
             $base_file = str_replace("na" . parent::getDatasetVersion(), "na" . parent::getParameterValue('version'), $base_file);
         }
         $csv_file = $base_file . ".csv";
         $zip_file = $csv_file . ".zip";
         $lfile = $ldir . $zip_file;
         if (!file_exists($lfile)) {
             echo "skipping: {$lfile} does not exist" . PHP_EOL;
             continue;
         }
         echo "processing {$lfile}" . PHP_EOL;
         // open the zip file
         $zin = new ZipArchive();
         if ($zin->open($lfile) === FALSE) {
             trigger_error("Unable to open {$lfile}");
             exit;
         }
         if (($fp = $zin->getStream($csv_file)) === FALSE) {
             trigger_error("Unable to get {$csv_file} in ziparchive {$lfile}");
             return FALSE;
         }
         parent::setReadFile($lfile);
         parent::getReadFile()->setFilePointer($fp);
         $this->parse($base_file);
         parent::getReadFile()->close();
         parent::clear();
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Affymetrix Probeset: {$base_file}")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/zip")->setPublisher("http://affymetrix.com")->setHomepage("http://www.affymetrix.com/support/technical/annotationfilesmain.affx")->setRights("use")->setRights("no-commercial")->setRights("registration-required")->setLicense("http://www.affymetrix.com/about_affymetrix/legal/index.affx")->setDataset("http://identifiers.org/affy.probeset/");
         $dataset_description .= $source_file->toRDF();
     }
     $this->getWriteFile()->close();
     // write the dataset description
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = parent::getDate(filemtime($odir . $outfile));
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/affymetrix/affymetrix.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description .= $output_file->toRDF();
     // write the dataset description
     $this->setWriteFile($odir . $this->getBio2RDFReleaseFile());
     $this->getWriteFile()->write($dataset_description);
     $this->getWriteFile()->close();
     return true;
 }
Example #27
0
 function process()
 {
     // get the file list
     if (parent::getParameterValue('files') == 'all') {
         $files = explode("|", parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(",", parent::getParameterValue('files'));
     }
     $dataset_description = '';
     //set directory values
     $ldir = parent::getParameterValue('indir');
     $rdir = parent::getParameterValue('download_url');
     $odir = parent::getParameterValue('outdir');
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $gz_suffix = ".gz";
     foreach ($files as $file) {
         if ($file == 'chem_gene_ixn_types') {
             $suffix = '.tsv';
         } else {
             if ($file == 'exposure_ontology') {
                 $suffix = '.obo';
             } else {
                 $suffix = ".tsv.gz";
             }
         }
         $lfile = $ldir . $file . $gz_suffix;
         $rfile = $rdir . 'CTD_' . $file . $suffix;
         if (!file_exists($lfile)) {
             trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE);
             if ($suffix == ".tsv.gz") {
                 Utils::DownloadSingle($rfile, $lfile);
             } else {
                 Utils::DownloadSingle($rfile, "compress.zlib://" . $lfile);
             }
         }
         $out_suffix = parent::getParameterValue('output_format');
         $ofile = "ctd_" . $file . "." . $out_suffix;
         $gz = false;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         echo "Processing " . $file . " ...";
         parent::setWriteFile($odir . $ofile, $gz);
         //set read file
         parent::setReadFile($lfile, TRUE);
         $fnx = "CTD_" . $file;
         $this->{$fnx}();
         //close write file
         parent::getWriteFile()->close();
         parent::clear();
         echo "done!" . PHP_EOL;
         // generate the dataset release file
         echo "Generating dataset description... ";
         if ($file == "chemicals") {
             $dataset = "http://identifiers.org/ctd.chemical/";
         } else {
             if ($file == "diseases") {
                 $dataset = "http://identifiers.org/ctd.disease/";
             } else {
                 if ($file == "genes") {
                     $dataset = "http://identifiers.org/ctd.gene/";
                 } else {
                     $dataset = null;
                 }
             }
         }
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Comparative Toxicogenomics Database ({$file}.{$gz_suffix}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://ctdbase.org/")->setHomepage("http://ctdbase.org/")->setRights("use")->setRights("by-attribution")->setRights("no-commercial")->setLicense("http://ctdbase.org/about/legal.jsp")->setDataset($dataset);
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ctd/ctd.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     }
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Example #28
0
 function process($db)
 {
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     while ($l = parent::getReadFile()->read()) {
         list($nsid, $name) = explode("\t", $l);
         list($ns, $id) = explode(":", $nsid);
         if (isset($this->idlist) and !in_array($id, $this->idlist)) {
             continue;
         }
         if (isset($this->org)) {
             $id = $ns . "_" . $id;
         }
         $uri = $this->getNamespace() . $id;
         parent::addRDF(parent::describeIndividual($uri, $name, parent::getVoc() . ucfirst($db)) . parent::describeClass(parent::getVoc() . ucfirst($db), "KEGG {$db}") . parent::triplifyString($uri, parent::getVoc() . "internal-id", $nsid));
         // now get the entries for each
         $lfile = $ldir . $id . ".txt";
         $rfile = parent::getParameterValue("download_url") . "get/{$nsid}";
         if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
             echo "downloading {$nsid} ";
             $ret = utils::downloadSingle($rfile, $lfile);
             if ($ret === false) {
                 echo "unable to download " . $nsid . " ... skipping" . PHP_EOL;
                 continue;
             }
             echo "done. ";
         }
         echo "parsing {$nsid} ... ";
         $this->parseEntry($lfile);
         parent::writeRDFBufferToWriteFile();
         if ($db === "pathway") {
             $ko = str_replace("map", "ko", $id);
             $lfile = $ldir . $id . ".kgml";
             $rfile = "http://www.kegg.jp/kegg-bin/download?entry=" . $ko . "&format=kgml";
             if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
                 echo "downloading KGML for {$nsid} ";
                 $ret = utils::downloadSingle($rfile, $lfile);
                 if ($ret === false) {
                     echo "unable to download " . $nsid . " ... skipping" . PHP_EOL;
                     continue;
                 }
                 echo "done. ";
             }
             $this->parseKGML($lfile);
             parent::writeRDFBufferToWriteFile();
         }
         echo "done!" . PHP_EOL;
     }
 }
Example #29
0
 function Parse($xml)
 {
     // state the dataset info
     foreach ($xml->release->dbinfo as $o) {
         $db = $o->attributes()->dbname . " v" . $o->attributes()->version . " (" . $o->attributes()->entry_count . " entries) [" . $o->attributes()->file_date . "]";
         parent::addRDF(parent::triplifyString(parent::getDatasetURI(), parent::getVoc() . "contains", $db));
         if ((string) $o->attributes()->dbname === "INTERPRO") {
             parent::setDatasetVersion($o->attributes()->version);
         }
     }
     // get a potential id list
     if (parent::getParameterValue("id_list") != '') {
         $id_list = explode(",", parent::getParameterValue("id_list"));
     }
     // now interate over the entries
     foreach ($xml->interpro as $o) {
         parent::writeRDFBufferToWriteFile();
         $interpro_id = $o->attributes()->id;
         if (isset($id_list) && !in_array($interpro_id, $id_list)) {
             continue;
         }
         echo "Processing {$interpro_id}" . PHP_EOL;
         $name = $o->name;
         $short_name = $o->attributes()->short_name;
         $type = $o->attributes()->type;
         $s = parent::getNamespace() . $interpro_id;
         //echo "Adding... $s rdfs:label $name ($short_name) $type [$s]".PHP_EOL;
         parent::addRDF(parent::describeIndividual($s, "{$name} ({$short_name}) {$type}", parent::getVoc() . $type));
         // get the pubs
         unset($pubs);
         foreach ($o->pub_list->publication as $p) {
             $pid = (string) $p->attributes()->id;
             if (isset($p->db_xref)) {
                 if ($p->db_xref->attributes()->db == "PUBMED") {
                     $pmid = (string) $p->db_xref->attributes()->dbkey;
                     $pubs['pid'][] = '<cite idref="' . $pid . '"/>';
                     $pubs['pmid'][] = '<a href="http://www.ncbi.nlm.nih.gov/pubmed/' . $pmid . '">pubmed:' . $pmid . '</a>';
                     parent::addRDF(parent::triplify($s, parent::getVoc() . "x-pubmed", "pubmed:{$pmid}"));
                 }
             }
         }
         $abstract = (string) $o->abstract->p->asXML();
         if (isset($pubs)) {
             $abstract = str_replace($pubs['pid'], $pubs['pmid'], $abstract);
         }
         parent::addRDF(parent::triplifyString($s, "dc:description", $abstract));
         if (isset($o->example_list)) {
             foreach ($o->example_list->example as $example) {
                 $db = (string) $example->db_xref->attributes()->db;
                 $id = (string) $example->db_xref->attributes()->dbkey;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "example-entry", "{$db}:{$id}"));
             }
         }
         if (isset($o->parent_list->rel_ref)) {
             foreach ($o->parent_list->rel_ref as $parent) {
                 $id = (string) $parent->attributes()->ipr_ref;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "parent", "interpro:{$id}"));
             }
         }
         if (isset($o->child->rel_ref)) {
             foreach ($o->child->rel_ref as $child) {
                 $id = (string) $child->attributes()->ipr_ref;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "child", "interpro:{$id}"));
             }
         }
         if (isset($o->contains->rel_ref)) {
             foreach ($o->contains->rel_ref as $contains) {
                 $id = (string) $contains->attributes()->ipr_ref;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "contains", "interpro:{$id}"));
             }
         }
         if (isset($o->found_in->rel_ref)) {
             foreach ($o->found_in->rel_ref as $f) {
                 $id = (string) $f->attributes()->ipr_ref;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "found-in", "interpro:{$id}"));
             }
         }
         if (isset($o->sec_list->sec_ac)) {
             foreach ($o->sec_ac as $s) {
                 $id = (string) $s->attributes()->acc;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "secondary-accession", "interpro:{$id}"));
             }
         }
         // xrefs
         if (isset($o->member_list->dbxref)) {
             foreach ($o->member_list->db_xref as $dbxref) {
                 $db = (string) $dbxref->attributes()->db;
                 $id = (string) $dbxref->attributes()->dbkey;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}"));
             }
         }
         if (isset($o->external_doc_list)) {
             foreach ($o->external_doc_list->db_xref as $dbxref) {
                 $db = (string) $dbxref->attributes()->db;
                 $id = (string) $dbxref->attributes()->dbkey;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}"));
             }
         }
         if (isset($o->structure_db_links->db_xref)) {
             foreach ($o->structure_db_links->db_xref as $dbxref) {
                 $db = (string) $dbxref->attributes()->db;
                 $id = (string) $dbxref->attributes()->dbkey;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}"));
             }
         }
         // taxon distribution
         foreach ($o->taxonomy_distribution->taxon_data as $t) {
             $organism = (string) $t->attributes()->name;
             $number = (string) $t->attributes()->proteins_count;
             parent::addRDF(parent::triplifyString($s, parent::getVoc() . "taxon-distribution", "{$organism} ({$number})"));
         }
     }
 }
Example #30
0
 function Run()
 {
     // get the work
     if ($this->GetParameterValue('files') == 'all') {
         $sources = explode("|", parent::getParameterList('files'));
         array_shift($sources);
     } else {
         // comma separated list
         $sources = explode(",", parent::getParameterValue('files'));
     }
     $download_files = array("h**o-sapiens" => "Pathway%20Commons%202%20homo%20sapiens.BIOPAX.owl.gz", "hprd" => "Pathway%20Commons%202%20HPRD.BIOPAX.owl.gz", "humancyc" => "Pathway%20Commons%202%20HumanCyc.BIOPAX.owl.gz", "nci-nature" => "Pathway%20Commons%202%20NCI_Nature.BIOPAX.owl.gz", "panther-pathway" => "Pathway%20Commons%202%20PANTHER%20Pathway.BIOPAX.owl.gz", "phosphositeplus" => "Pathway%20Commons%202%20PhosphoSitePlus.BIOPAX.owl.gz", "reactome" => "Pathway%20Commons%202%20Reactome.BIOPAX.owl.gz");
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $dataset_description = '';
     // iterate over the requested data
     foreach ($sources as $source) {
         echo "processing {$source}... ";
         $ldir = parent::getParameterValue('indir');
         $odir = parent::getParameterValue('outdir');
         $rdir = parent::getParameterValue('download_url');
         // set the remote and input files
         $file = $source . ".owl";
         $zfile = $source . ".owl.gz";
         $rfile = $rdir . $download_files[$source];
         $lfile = $ldir . $zfile;
         // download if if the file doesn't exist locally or we are told to
         if (!file_exists($lfile) || $this->GetParameterValue('download') == 'true') {
             // download
             echo "downloading... ";
             file_put_contents($lfile, file_get_contents($rfile));
         }
         // extract the file out of the ziparchive
         // and load into a buffer
         echo 'extracting... ';
         if (($fpin = gzopen($lfile, "r")) === FALSE) {
             trigger_error("Unable to open {$lfile}", E_USER_ERROR);
             exit;
         }
         $data = '';
         while (!gzeof($fpin)) {
             $buffer = gzgets($fpin, 4096);
             $data .= $buffer;
         }
         gzclose($fpin);
         // set the output file
         $suffix = parent::getParameterValue('output_format');
         $outfile = $source . '.' . $suffix;
         $gz = false;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         parent::setWriteFile($odir . $outfile, $gz);
         // send for parsing
         $p = new BioPAX2Bio2RDF($this);
         $p->SetBuffer($data)->SetBioPAXVersion(3)->SetBaseNamespace("http://purl.org/pc2/3/")->SetBio2RDFNamespace("http://bio2rdf.org/pathwaycommons:")->SetDatasetURI(parent::getDatasetURI());
         $rdf = $p->Parse();
         parent::addRDF($rdf);
         // write to output
         parent::writeRDFBufferToWriteFile();
         parent::getWriteFile()->Close();
         echo "done!" . PHP_EOL;
         //generate dataset description
         echo "Generating dataset description for {$zfile}... ";
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Pathway Commons")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("rdf/xml")->setPublisher("http://www.pathwaycommons.org/")->setHomepage("http://www.pathwaycommons.org/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("http://www.pathwaycommons.org/pc2/home.html#data_sources")->setDataset("http://identifiers.org/pathwaycommons/");
         $dataset_description .= $source_file->toRDF();
         echo "done!" . PHP_EOL;
     }
     echo "Generating dataset description for Bio2RDF Pathways Commons dataset... ";
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pathwaycommons/pathwaycommons.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description .= $output_file->toRDF();
     //write dataset description to file
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }