Beispiel #1
0
 public function Run()
 {
     $dataset_description = '';
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     //first get the files that are to be processed
     $selectedPackage = trim(parent::getParameterValue('files'));
     if ($selectedPackage == 'all') {
         $files = $this->getPackageMap();
     } else {
         $sel_arr = explode(",", $selectedPackage);
         $pm = $this->getPackageMap();
         $files = array();
         foreach ($sel_arr as $a) {
             if (array_key_exists($a, $pm)) {
                 $files[$a] = $pm[$a];
             }
         }
     }
     //download
     if ($this->getParameterValue('download')) {
         foreach ($files as $aP => $fn) {
             foreach ($fn as $aFn) {
                 echo "downloading file {$aFn} :" . parent::getParameterValue('download_url') . $aFn . "..." . PHP_EOL;
                 file_put_contents($ldir . $aFn, file_get_contents(parent::getParameterValue('download_url') . $aFn));
             }
         }
     }
     //iterate over the files
     $paths = $this->getFilePaths($ldir, 'gz');
     $lfile = null;
     foreach ($files as $k => $val) {
         foreach ($val as $fn) {
             if (in_array($fn, $paths)) {
                 $lfile = $fn;
                 $ofile = $odir . basename($fn, ".gz") . "." . parent::getParameterValue('output_format');
                 $gz = false;
                 if (strstr(parent::getParameterValue('output_format'), "gz")) {
                     $gz = true;
                 }
                 parent::setWriteFile($ofile, $gz);
                 parent::setReadFile($ldir . $lfile, true);
                 $source_file = (new DataResource($this))->setURI(parent::getParameterValue('download_url') . basename($fn))->setTitle('International Protein Index filename: ' . basename($fn))->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($ldir . $lfile)))->setFormat('text/ipi-format')->setFormat('application/zip')->setPublisher('https://www.ebi.ac.uk')->setHomepage('https://www.ebi.ac.uk/IPI')->setRights('use')->setRights('attribution')->setLicense('https://www.ebi.ac.uk')->setDataset(parent::getDatasetURI());
                 $prefix = parent::getPrefix();
                 $bVersion = parent::getParameterValue('bio2rdf_release');
                 $date = date("Y-m-d\\TG:i:s\\Z");
                 $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ipi/ipi.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("restricted-by-source-license")->setLicense("http://creativecommons/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
                 $dataset_description .= $output_file->toRDF() . $source_file->toRDF();
                 echo "processing {$fn} ...";
                 $this->{$k}();
                 echo "done!" . PHP_EOL;
                 $this->setWriteFile($odir . $this->getBio2RDFReleaseFile());
                 $this->getWriteFile()->write($dataset_description);
                 $this->getWriteFile()->close();
             }
         }
     }
 }
Beispiel #2
0
 function Run()
 {
     $file = "hgnc_complete_set.txt.gz";
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     $lfile = $ldir . $file;
     if (!file_exists($lfile) && parent::getParameterValue('download') == false) {
         trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE);
         parent::setParameterValue('download', true);
     }
     //download the hgnc file
     $rfile = null;
     if (parent::getParameterValue('download') == true) {
         $rfile = $rdir;
         echo "downloading {$file} ... ";
         Utils::DownloadSingle($rfile, $lfile);
     }
     $ofile = $odir . "hgnc." . parent::getParameterValue('output_format');
     $gz = false;
     if (strstr(parent::getParameterValue('output_format'), "gz")) {
         $gz = true;
     }
     parent::setWriteFile($ofile, $gz);
     parent::setReadFile($lfile, true);
     echo "processing {$file}... ";
     $this->process();
     echo "done!" . PHP_EOL;
     //close write file
     parent::getWriteFile()->close();
     echo PHP_EOL;
     // generate the dataset release file
     echo "generating dataset release file... ";
     $dataset_description = '';
     $source_file = (new DataResource($this))->setURI($rdir)->setTitle('HUGO Gene Nomenclature Committee (HGNC)')->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat('text/tab-separated-value')->setFormat('application/zip')->setPublisher('http://www.genenames.org/')->setHomepage('http://www.genenames.org/data/gdlw_columndef.html')->setRights('use')->setRights('attribution')->setLicense('http://www.genenames.org/about/overview')->setDataset(parent::getDatasetURI());
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/hgnc/hgnc.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("restricted-by-source-license")->setLicense("http://creativecommons/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     $this->setWriteFile($odir . $this->getBio2RDFReleaseFile());
     $this->getWriteFile()->write($dataset_description);
     $this->getWriteFile()->close();
 }
Beispiel #3
0
 function run()
 {
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $dd = '';
     $files = parent::getParameterValue('files');
     if ($files == 'all') {
         $files = explode('|', parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(',', parent::getParameterValue('files'));
     }
     foreach ($files as $file) {
         echo "processing {$file} ...";
         $lfile = $ldir . $this->filemap[$file];
         $rfile = parent::getParameterValue('download_url') . $this->filemap[$file];
         if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
             $ret = utils::downloadSingle($rfile, $lfile);
             if ($ret === false) {
                 echo "unable to download {$file} ... skipping" . PHP_EOL;
                 continue;
             }
         }
         parent::setReadFile($lfile, true);
         $suffix = parent::getParameterValue('output_format');
         $ofile = "orphanet-" . $file . '.' . $suffix;
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? $gz = true : ($gz = false);
         /*			parent::setWriteFile($odir.$ofile, $gz);
         			$this->$file($lfile);
         			parent::getWriteFile()->close();
         */
         parent::getReadFile()->close();
         parent::clear();
         echo "done!" . PHP_EOL;
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Orphanet: {$file}")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("application/xml")->setPublisher("http://www.orpha.net")->setHomepage("http://www.orpha.net/")->setRights("use")->setRights("sharing-modified-version-needs-permission")->setLicense("http://creativecommons.org/licenses/by-nd/3.0/")->setDataset("http://identifiers.org/orphanet/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = parent::getDate(filemtime($odir . $ofile));
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/orphanet/orphanet.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dd .= $source_file->toRDF() . $output_file->toRDF();
     }
     //foreach
     parent::writeToReleaseFile($dd);
 }
Beispiel #4
0
 function Run()
 {
     $idir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $files = parent::getParameterValue('files');
     if ($files == 'all') {
         $list = explode('|', parent::getParameterList('files'));
         array_shift($list);
     } else {
         $list = explode(',', parent::getParameterValue('files'));
     }
     $dataset_description = '';
     foreach ($list as $item) {
         $lfile = $idir . $item . '.rpt';
         $rfile = parent::getParameterValue('download_url') . $item . '.rpt';
         if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
             echo "downloading {$item}...";
             $ret = Utils::DownloadSingle($rfile, $lfile);
             if ($ret != true) {
                 continue;
             }
         }
         parent::setReadFile($lfile, true);
         echo "Processing {$item}...";
         $ofile = $odir . $item . '.' . parent::getParameterValue('output_format');
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
         parent::setWriteFile($ofile, $gz);
         $this->{$item}();
         parent::getWriteFile()->close();
         parent::getReadFile()->close();
         echo "Done" . PHP_EOL;
         parent::clear();
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("MGI {$item}")->setRetrievedDate(date("Y-m-d\\TH:i:s", filemtime($lfile)))->setFormat("text")->setPublisher("http://www.informatics.jax.org")->setHomepage("http://www.informatics.jax.org")->setRights("use")->setLicense("http://www.informatics.jax.org/mgihome/other/copyright.shtml")->setDataset("http://identifiers.org/mgi/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TH:i:s");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$item} in {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/mgi/mgi.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     }
     //foreach
     // generate the dataset release file
     $this->setWriteFile($odir . parent::getBio2RDFReleaseFile());
     $this->getWriteFile()->write($dataset_description);
     $this->getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #5
0
 function Run()
 {
     $file = "homologene.data";
     $ldir = $this->GetParameterValue('indir');
     $odir = $this->GetParameterValue('outdir');
     $rdir = $this->GetParameterValue('download_url');
     $lfile = $ldir . $file;
     if (!file_exists($lfile)) {
         trigger_error($file . " not found. Will attempt to download.", E_USER_NOTICE);
         parent::setParameterValue('download', true);
     }
     //download
     $rfile = $rdir . $file;
     if ($this->GetParameterValue('download') == true) {
         echo "downloading {$file} ... ";
         utils::downloadSingle($rfile, $lfile);
     }
     $ofile = 'homologene.' . parent::getParameterValue('output_format');
     $gz = strstr(parent::getParameterValue('output_format'), "gz") ? $gz = true : ($gz = false);
     parent::setReadFile($lfile);
     parent::setWriteFile($odir . $ofile, $gz);
     echo "processing {$file}... ";
     $this->process();
     echo "done!" . PHP_EOL;
     parent::getWriteFile()->close();
     // generate the dataset release file
     $source_file = (new DataResource($this))->setURI($rfile)->setTitle("NCBI Homologene")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setPublisher("http://www.ncbi.nlm.nih.gov")->setHomepage("http://www.ncbi.nlm.nih.gov/homologene")->setRights("use-share-modify")->setLicense("http://www.ncbi.nlm.nih.gov/About/disclaimer.html")->setDataset("http://identifiers.org/homologene/");
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/homologene/homologene.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description = $source_file->toRDF() . $output_file->toRDF();
     echo "Generating dataset description... ";
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #6
0
 public function Run()
 {
     $file = "iproclass.tb.gz";
     $ldir = $this->GetParameterValue('indir');
     $odir = $this->GetParameterValue('outdir');
     $rdir = $this->GetParameterValue('download_url');
     $lfile = $ldir . $file;
     if (!file_exists($lfile)) {
         trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE);
         parent::setParameterValue('download', true);
     }
     //download all files
     $rfile = $rdir . $file;
     if ($this->GetParameterValue('download') == true) {
         echo "downloading {$file}... ";
         utils::DownloadSingle($rfile, $lfile);
         //			$cmd = "gzip -c $lfile | split -d -l 1000000 --filter='gzip > $FILE.gz' - iproclass-"
     }
     $ofile = "iproclass.nq";
     $gz = true;
     parent::setReadFile($lfile, true);
     echo "processing {$file}... ";
     $this->process();
     echo "done!" . PHP_EOL;
     parent::getWriteFile()->close();
     echo "generating dataset release file... ";
     $source_file = (new DataResource($this))->setURI($rfile)->setTitle("iProClass")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://pir.georgetown.edu")->setHomepage("http://pir.georgetown.edu/iproclass")->setRights("use-share-modify")->setLicense("http://pir.georgetown.edu/pirwww/about/linkpir.shtml")->setDataset("http://identifiers.org/iproclass/");
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/iproclass/iproclass.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description = $source_file->toRDF() . $output_file->toRDF();
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #7
0
 function run()
 {
     $dataset_description = '';
     $ldir = parent::GetParameterValue('indir');
     $odir = parent::GetParameterValue('outdir');
     //download
     if ($this->GetParameterValue('download') == true) {
         $list = $this->getFtpFileList('ftp.ncbi.nih.gov');
         $total = count($list);
         $counter = 1;
         foreach ($list as $f) {
             echo "downloading file {$counter} out of {$total} :" . parent::getParameterValue('download_url') . $f . "... " . PHP_EOL;
             file_put_contents($ldir . $f, file_get_contents(parent::GetParameterValue('download_url') . $f));
             $counter++;
         }
     }
     //if download
     //iterate over the files
     $paths = $this->getFilePaths($ldir, 'gz');
     $lfile = null;
     foreach ($paths as $aPath) {
         $lfile = $aPath;
         $ofile = $odir . basename($aPath, ".gz") . "." . parent::getParameterValue('output_format');
         $gz = false;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         parent::setWriteFile($ofile, $gz);
         parent::setReadFile($ldir . $lfile, true);
         $source_file = (new DataResource($this))->setURI(parent::getParameterValue('download_url') . basename($aPath))->setTitle('NCBI UniSTS filename: ' . basename($aPath))->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($ldir . $lfile)))->setFormat('xml/unists-format')->setFormat('application/zip')->setPublisher('https://www.ncbi.nlm.nih.gov')->setHomepage('https://www.ncbi.nlm.nih.gov/unists')->setRights('use')->setRights('attribution')->setLicense('https://www.nlm.nih.gov/copyright.html')->setDataset(parent::getDatasetURI());
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/unists/unists.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("restricted-by-source-license")->setLicense("http://creativecommons/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         $dataset_description .= $output_file->toRDF() . $source_file->toRDF();
         echo "processing {$aPath} ...";
         $this->process();
         echo "done!" . PHP_EOL;
         $this->setWriteFile($odir . $this->getBio2RDFReleaseFile());
         $this->getWriteFile()->write($dataset_description);
         $this->getWriteFile()->close();
     }
     //foreach
 }
Beispiel #8
0
 function Run()
 {
     echo "processing miriam database";
     // directory shortcuts
     $ldir = $this->getParameterValue('indir');
     $odir = $this->getParameterValue('outdir');
     // download and set the read file
     $file = 'miriam.xml';
     $rfile = $this->getParameterValue("download_url");
     $lfile = $ldir . $file;
     if (!file_exists($lfile) || $this->getParameterValue("download") == "true") {
         utils::downloadSingle($rfile, $lfile);
     }
     parent::setReadFile($lfile);
     // set the write file
     $outfile = "miriam." . parent::getParameterValue('output_format');
     $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
     parent::setWriteFile(parent::getParameterValue("outdir") . $outfile, $gz);
     $this->parse();
     parent::WriteRDFBufferToWriteFile();
     $this->getWriteFile()->Close();
     return true;
 }
Beispiel #9
0
 function Run()
 {
     $dataset_description = '';
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $list_file = $ldir . "ftp_list.txt";
     if (!file_exists($list_file) || $this->getParameterValue('download') == true) {
         echo "Getting FTP file list ...";
         $list = $this->getFtpFileList('ftp.ncbi.nlm.nih.gov', '/refseq/release/complete/', '/(complete\\.[0-9]+\\.protein\\.gpff\\.gz)/');
         if (!isset($list) or count($list) == 0) {
             trigger_error("Unable to get list of files from FTP site. Check internet connection", E_USER_ERROR);
             exit(-1);
         }
         asort($list);
         $buf = implode("\n", $list);
         file_put_contents($list_file, $buf);
         echo "Done." . PHP_EOL;
     } else {
         echo "Using existing ftp list" . PHP_EOL;
         $list = explode("\n", file_get_contents($list_file));
     }
     $counter = 1;
     $total = count($list);
     foreach ($list as $f) {
         $lfile = $ldir . $f;
         echo "Processing " . $counter++ . "/{$total} {$f}. ";
         if (!file_exists($lfile) || $this->getParameterValue('download') == true) {
             $rfile = parent::getParameterValue('download_url') . $f;
             echo "Downloading ...";
             utils::DownloadSingle($rfile, $lfile);
             echo "done.";
         } else {
             echo "Using existing file.";
         }
         echo PHP_EOL;
     }
     //if download
     //iterate over the files
     $files = $this->getFilePaths($ldir, 'gz');
     asort($files);
     foreach ($files as $f) {
         $lfile = $ldir . $f;
         $ofile = $odir . basename($f, ".gz") . "." . parent::getParameterValue('output_format');
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
         parent::setWriteFile($ofile, $gz);
         parent::setReadFile($lfile, true);
         echo "processing {$f} ...";
         $this->process();
         parent::clear();
         echo "done!" . PHP_EOL;
         $this->getReadFile()->close();
         $this->getWriteFile()->close();
         $source_file = (new DataResource($this))->setURI(parent::getParameterValue('download_url') . $lfile)->setTitle("NCBI RefSeq - {$f}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat('text/refseq-format')->setFormat('application/zip')->setPublisher('http://www.ncbi.nlm.nih.gov')->setHomepage('http://www.ncbi.nlm.nih.gov/refseq')->setRights('use')->setRights('attribution')->setLicense('http://www.nlm.nih.gov/copyright.html')->setDataset(parent::getDatasetURI());
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$f}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/refseq/refseq.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("restricted-by-source-license")->setLicense("http://creativecommons/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         $dataset_description .= $output_file->toRDF() . $source_file->toRDF();
     }
     //for
     parent::writeToReleaseFile($dataset_description);
     parent::getWriteFile()->close();
 }
Beispiel #10
0
 function process()
 {
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     //which files are to be converted?
     $files = trim($this->GetParameterValue('files'));
     if ($files == 'all') {
         $files = $this->getPackageMap();
     } else {
         $sel_arr = explode(",", $files);
         $pm = $this->getPackageMap();
         $files = array();
         foreach ($sel_arr as $a) {
             if (array_key_exists($a, $pm)) {
                 $files[$a] = $pm[$a];
             }
         }
     }
     if ($this->getParameterValue('limit_organisms') == true) {
         $this->taxids = array_flip(explode(",", $this->getParameterValue('organisms')));
     }
     //set dataset graph to be dataset URI
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $dataset_description = '';
     //now iterate over the files array
     foreach ($files as $module => $rfilename) {
         $file = $module . ".gz";
         $lfile = $ldir . $file;
         $rfile = $rdir . $rfilename;
         // download
         if (!file_exists($lfile) || parent::getParameterValue('download') == true) {
             trigger_error("{$lfile} not found. Will attempt to download.", E_USER_NOTICE);
             $myfile = $lfile;
             if ($module == "gene2sts" || $module == "gene2unigene") {
                 $myfile = "compress.zlib://" . $lfile;
             }
             echo "downloading {$module} ...";
             utils::DownloadSingle($rfile, $myfile);
             echo "done" . PHP_EOL;
         }
     }
     foreach ($files as $module => $rfilename) {
         $file = $module . ".gz";
         $lfile = $ldir . $file;
         $rfile = $rdir . $rfilename;
         $ofile = $module . "." . parent::getParameterValue('output_format');
         $gz = false;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         echo "Processing {$module} ... ";
         parent::setReadFile($lfile, true);
         parent::setWriteFile($odir . $ofile, $gz);
         $fnx = $module;
         if ($module == 'gene2refseq') {
             $fnx = 'gene2accession';
         }
         $this->{$fnx}();
         parent::clear();
         echo 'done!' . PHP_EOL;
         parent::getReadFile()->close();
         parent::getWriteFile()->close();
         // generate the dataset release file
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("NCBI Gene ({$module})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://www.ncbi.nlm.nih.gov")->setHomepage("http://www.ncbi.nlm.nih.gov/gene")->setRights("use-share-modify")->setLicense("http://www.ncbi.nlm.nih.gov/About/disclaimer.html")->setDataset("http://identifiers.org/ncbigene/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ncbigene/ncbigene.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     }
     //foreach
     //set graph URI back to default value
     parent::setGraphURI($graph_uri);
     //write dataset description to file
     echo "Generating dataset description... ";
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #11
0
 function run()
 {
     $idir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $files = parent::getParameterValue('files');
     $dataset_description = '';
     if ($files == 'all') {
         $files = explode('|', parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(',', parent::getParameterValue('files'));
     }
     foreach ($files as $file) {
         $f = $file;
         if ($file != "freq") {
             $f = "all_" . $file;
         }
         $f = "meddra_" . $f . ".tsv.gz";
         $lfile = $idir . $f;
         $rfile = parent::getParameterValue('download_url') . $f;
         if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
             echo "downloading {$file}... ";
             $ret = file_get_contents($rfile);
             if ($ret === FALSE) {
                 trigger_error("Unable to get {$rfile}", E_USER_WARNING);
                 continue;
             }
             $ret = file_put_contents($lfile, $ret);
             if ($ret === FALSE) {
                 trigger_error("Unable to write {$lfile}", E_USER_ERROR);
                 exit;
             }
             echo "done!" . PHP_EOL;
         }
         echo "Processing {$f}... ";
         parent::setReadFile($lfile, true);
         $suffix = parent::getParameterValue('output_format');
         $ofile = "sider-" . $file . '.' . $suffix;
         $gz = false;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         parent::setWriteFile($odir . $ofile, $gz);
         $this->{$file}();
         parent::getWriteFile()->Close();
         parent::getReadFile()->Close();
         echo "done!" . PHP_EOL;
         echo "Generating dataset description... ";
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("SIDER Side Effect resource ({$file}.tsv.gz")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://sideeffects.embl.de/")->setHomepage("http://sideeffects.embl.de/")->setRights("use-share-modify")->setLicense("http://creativecommons.org/licenses/by-nc-sa/3.0/")->setDataset("http://identifiers.org/sider.effect/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = parent::getDate(filemtime($odir . $ofile));
         $output_file = (new DataResource($this))->setURI("http://download.bio2df.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sider/sider.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     }
     //foreach
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #12
0
 function Run()
 {
     // directory shortcuts
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     // get the listings page
     $url = trim(parent::getParameterValue('download_url'));
     $listing_file = $ldir . "probeset_list.html";
     if (!file_exists($listing_file) || parent::getParameterValue("download") == "true") {
         echo "Downloading {$listing_file}" . PHP_EOL;
         Utils::DownloadSingle($url, $listing_file);
     }
     $listings = file_get_contents($listing_file);
     // make a list of the csv.zip files
     preg_match_all("/\"([^\"]+)\\.csv\\.zip\"/", $listings, $m);
     if (count($m[1]) == 0) {
         trigger_error("could not find any .csv.zip files in {$url}");
         exit;
     }
     if (parent::getParameterValue("files") == 'all') {
         $myfiles = $m[1];
     } else {
         $a = explode(",", parent::getParameterValue("files"));
         foreach ($a as $f) {
             $found = false;
             foreach ($m[1] as $n) {
                 if (strstr($n, $f)) {
                     $found = true;
                     $myfiles[] = $n;
                     break;
                 }
             }
             if ($found === false) {
                 echo "cannot find {$f} in list" . PHP_EOL;
             }
         }
     }
     if (!isset($myfiles)) {
         exit;
     }
     // nothing to do
     $dataset_description = '';
     // set the write file
     $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
     $outfile = 'affymetrix.' . parent::getParameterValue('output_format');
     $this->setWriteFile($odir . $outfile, $gz);
     // iterate over the files
     foreach ($myfiles as $rfile) {
         $base_file = substr($rfile, strrpos($rfile, "/") + 1);
         $base_url = substr($rfile, 0, strrpos($rfile, "/"));
         // get and set the dataset version
         if (parent::getDatasetVersion() == null) {
             preg_match("/\\.na([0-9]{2})\\.annot/", $base_file, $m);
             if (isset($m[1])) {
                 $this->setDatasetVersion($m[1]);
             }
         }
         if (parent::getDatasetVersion() != parent::getParameterValue('version')) {
             $base_file = str_replace("na" . parent::getDatasetVersion(), "na" . parent::getParameterValue('version'), $base_file);
         }
         $csv_file = $base_file . ".csv";
         $zip_file = $csv_file . ".zip";
         $lfile = $ldir . $zip_file;
         if (!file_exists($lfile)) {
             echo "skipping: {$lfile} does not exist" . PHP_EOL;
             continue;
         }
         echo "processing {$lfile}" . PHP_EOL;
         // open the zip file
         $zin = new ZipArchive();
         if ($zin->open($lfile) === FALSE) {
             trigger_error("Unable to open {$lfile}");
             exit;
         }
         if (($fp = $zin->getStream($csv_file)) === FALSE) {
             trigger_error("Unable to get {$csv_file} in ziparchive {$lfile}");
             return FALSE;
         }
         parent::setReadFile($lfile);
         parent::getReadFile()->setFilePointer($fp);
         $this->parse($base_file);
         parent::getReadFile()->close();
         parent::clear();
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Affymetrix Probeset: {$base_file}")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/zip")->setPublisher("http://affymetrix.com")->setHomepage("http://www.affymetrix.com/support/technical/annotationfilesmain.affx")->setRights("use")->setRights("no-commercial")->setRights("registration-required")->setLicense("http://www.affymetrix.com/about_affymetrix/legal/index.affx")->setDataset("http://identifiers.org/affy.probeset/");
         $dataset_description .= $source_file->toRDF();
     }
     $this->getWriteFile()->close();
     // write the dataset description
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = parent::getDate(filemtime($odir . $outfile));
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/affymetrix/affymetrix.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description .= $output_file->toRDF();
     // write the dataset description
     $this->setWriteFile($odir . $this->getBio2RDFReleaseFile());
     $this->getWriteFile()->write($dataset_description);
     $this->getWriteFile()->close();
     return true;
 }
Beispiel #13
0
 function Run()
 {
     $sp = trim(parent::getParameterValue('files'));
     if ($sp == 'all') {
         $files = $this->getPackageMap();
     } else {
         $s_a = explode(",", $sp);
         $pm = $this->getPackageMap();
         $files = array();
         foreach ($s_a as $a) {
             if (array_key_exists($a, $pm)) {
                 $files[$a] = $pm[$a];
             }
         }
     }
     //else
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $dd = '';
     //now iterate over the files array
     $year = parent::getParameterValue('year');
     foreach ($files as $k => $fpattern) {
         $file = str_replace("YEAR", $year, $fpattern);
         $lfile = $ldir . $file;
         $rfile = parent::getParameterValue("download_url") . $file;
         // download if necessary
         if (!file_exists($lfile) || parent::getParameterValue('download') == "true") {
             echo "Downloading {$file} ... ";
             $ret = utils::downloadSingle($rfile, $lfile);
             if ($ret === FALSE) {
                 trigger_error("Unable to get {$file}", E_USER_ERROR);
                 continue;
             }
             echo "done!" . PHP_EOL;
         }
         //set the outfile
         $ofile = "mesh_" . $k . "." . parent::getParameterValue('output_format');
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
         echo "processing {$k} ...";
         parent::setReadFile($lfile, FALSE);
         parent::setWriteFile($odir . $ofile, $gz);
         $fnx = $k;
         $this->{$fnx}();
         parent::writeRDFBufferToWriteFile();
         parent::getWriteFile()->close();
         echo "done!" . PHP_EOL;
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("MeSH")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/x-mesh-record")->setPublisher("http://www.nlm.nih.gov")->setHomepage("http://www.nlm.nih.gov/mesh/")->setRights("use")->setLicense("http://www.nlm.nih.gov/databases/download.html")->setDataset("http://identifiers.org/mesh/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = parent::getDate(filemtime($odir . $ofile));
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/mesh/mesh.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dd .= $source_file->toRDF() . $output_file->toRDF();
     }
     //foreach
     parent::setWriteFile($odir . $this->getBio2RDFReleaseFile($this->getNamespace()));
     parent::getWriteFile()->write($dd);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #14
0
 function run()
 {
     if (parent::getParameterValue('files') == 'all') {
         $files = explode("|", $this->GetParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(",", parent::getParameterValue('files'));
     }
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     $dataset_description = '';
     foreach ($files as $file) {
         $download = parent::getParameterValue('download');
         $lfile = $ldir . "goa_" . $file . ".gz";
         if (!file_exists($lfile) && $download == false) {
             trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE);
             $download = true;
         }
         //download file
         $rfile = $rdir . strtoupper($file) . "/gene_association.goa_" . $file . ".gz";
         if ($download == true) {
             echo "downloading {$file} ... ";
             //file_put_contents($lfile,file_get_contents($rfile));
             utils::DownloadSingle($rfile, $lfile);
         }
         $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
         $ofile = "goa_" . $file . "." . parent::getParameterValue('output_format');
         parent::setReadFile($lfile, TRUE);
         parent::setWriteFile($odir . $ofile, $gz);
         echo "processing {$file} ... ";
         $this->process($file);
         echo "done!";
         parent::clear();
         //close write file
         parent::getWriteFile()->close();
         echo PHP_EOL;
         // dataset description
         $graph_uri = parent::getGraphURI();
         if (parent::getParameterValue('dataset_graph') == true) {
             parent::setGraphURI(parent::getDatasetURI());
         }
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Gene Ontology Annotation file {$file} ({$rfile}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://www.ebi.ac.uk/")->setHomepage("http://www.ebi.ac.uk/GOA/")->setRights("use")->setLicense("http://www.ebi.ac.uk/GOA/goaHelp.html")->setDataset("http://identifiers.org/goa/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/irefindex/irefindex.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
         parent::setGraphURI($graph_uri);
     }
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #15
0
 public function run()
 {
     if (parent::getParameterValue('files') == 'all') {
         $files = explode("|", parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(",", parent::getParameterValue('files'));
     }
     $release = parent::getParameterValue('release');
     $releaseb = "WS249";
     $remote_files = array("geneIDs" => "species/c_elegans/annotation/geneIDs/c_elegans.PRJNA13758." . $release . ".geneIDs.txt.gz", "functional_descriptions" => "species/c_elegans/annotation/functional_descriptions/c_elegans.PRJNA13758." . $release . ".functional_descriptions.txt.gz", "gene_interactions" => "species/c_elegans/annotation/gene_interactions/c_elegans.PRJNA13758." . $release . ".gene_interactions.txt.gz", "gene_associations" => "releases/current-production-release/ONTOLOGY/gene_association." . $releaseb . ".wb", "phenotype_associations" => "releases/current-production-release/ONTOLOGY/phenotype_association." . $releaseb . ".wb");
     $local_files = array("geneIDs" => "wormbase." . parent::getParameterValue('release') . ".genes.txt.gz", "functional_descriptions" => "wormbase." . parent::getParameterValue('release') . ".functional_descriptions.txt.gz", "gene_interactions" => "wormbase." . parent::getParameterValue('release') . ".gene_interactions.txt.gz", "gene_associations" => "wormbase." . parent::getParameterValue('release') . ".gene_association.wb", "phenotype_associations" => "wormbase." . parent::getParameterValue('release') . ".phenotype_associations.wb");
     $idir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     $dataset_description = '';
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     foreach ($files as $file) {
         $lfile = $idir . $local_files[$file];
         $rfile = $rdir . $remote_files[$file];
         if (!file_exists($lfile) or parent::getParameterValue('download') == true) {
             trigger_error($lfile . " not found. Will attempt to download." . PHP_EOL, E_USER_WARNING);
             echo "Downloading {$rfile}... ";
             Utils::DownloadSingle($rfile, $lfile);
             echo "done!" . PHP_EOL;
         }
         if (strstr($lfile, "gz")) {
             parent::setReadFile($lfile, TRUE);
         } else {
             parent::setReadFile($lfile, FALSE);
         }
         $suffix = parent::getParameterValue('output_format');
         $ofile = "wormbase." . $file . "." . $suffix;
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
         parent::setWriteFile($odir . $ofile, $gz);
         echo "Processing {$file}... ";
         $fnx = $file;
         $this->{$fnx}();
         echo "done!" . PHP_EOL;
         parent::getWriteFile()->close();
         // generate the dataset release file
         echo "Generating dataset description for {$ofile}... ";
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("WormBase Release " . parent::getParameterValue('release') . " subset ({$file})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://wormbase.org/")->setHomepage("http://wormbase.org/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("http://www.wormbase.org/about/policies")->setDataset("http://identifiers.org/wormbase/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/wormbase/wormbase.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
         echo "done!" . PHP_EOL;
     }
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
 }
Beispiel #16
0
 function run()
 {
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $dataset_description = '';
     $files = parent::getParameterValue('files');
     if ($files == 'all') {
         $files = explode('|', parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(',', parent::getParameterValue('files'));
     }
     if (parent::getParameterValue('id_list') != '') {
         $this->idlist = explode(",", parent::getParameterValue("id_list"));
     }
     // handle genes separately
     if (in_array("genes", $files)) {
         $orgs = array("hsa");
         //,"mmu","eco","dre","dme","ath","sce","ddi");
         echo "processing genes" . PHP_EOL;
         $ofile = "kegg-genes." . parent::getParameterValue('output_format');
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
         parent::setWriteFile($odir . $ofile, $gz);
         // get the list of genomes
         $lfile = $ldir . "genome.txt";
         $rfile = parent::getParameterValue("download_url") . "list/genome";
         if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
             $ret = utils::downloadSingle($rfile, $lfile);
         }
         $fp = fopen($lfile, "r");
         while ($l = fgets($fp)) {
             $a = explode("\t", $l);
             $b = explode(", ", $a[1]);
             $org = $b[0];
             if (!in_array($org, $orgs)) {
                 continue;
             }
             // get the list of genes for this organims
             echo "processing {$org}" . PHP_EOL;
             $this->org = $org;
             // local variable
             $lfile = $ldir . $org . ".txt";
             $rfile = parent::getParameterValue("download_url") . "list/{$org}";
             if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
                 $ret = utils::downloadSingle($rfile, $lfile);
             }
             parent::setReadFile($lfile, false);
             $this->process("gene");
             parent::getReadFile()->close();
             parent::clear();
             $this->org = null;
             // add dataset description
             $source_file = (new DataResource($this))->setURI($rfile)->setTitle("KEGG: Gene")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/plain")->setPublisher("http://www.kegg.jp/")->setHomepage("http://www.kegg.jp/")->setRights("use")->setRights("no-commercial")->setLicense("http://www.kegg.jp/kegg/legal.html")->setDataset("http://identifiers.org/kegg/");
             $dataset_description .= $source_file->toRDF();
         }
         fclose($fp);
         parent::getWriteFile()->close();
         echo "done" . PHP_EOL;
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = parent::getDate(filemtime($odir . $ofile));
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - Gene ")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/kegg/kegg.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $output_file->toRDF();
     }
     // all other files
     foreach ($files as $db) {
         if ($db == "genes") {
             continue;
         }
         echo "processing {$db}" . PHP_EOL;
         $lfile = $ldir . $db . ".txt";
         $rfile = parent::getParameterValue("download_url") . "list/{$db}";
         if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
             echo "Downloading {$rfile} ";
             $ret = utils::downloadSingle($rfile, $lfile);
             if ($ret === false) {
                 echo "unable to download {$file} ... skipping" . PHP_EOL;
                 continue;
             }
             echo "done." . PHP_EOL;
         }
         // now for each list, get the individual entries
         $ofile = "kegg-{$db}." . parent::getParameterValue('output_format');
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
         parent::setReadFile($lfile, false);
         parent::setWriteFile($odir . $ofile, $gz);
         $this->process($db);
         parent::getWriteFile()->close();
         parent::getReadFile()->close();
         parent::clear();
         echo "done!" . PHP_EOL;
         // add dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("KEGG: {$db}")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/plain")->setPublisher("http://www.kegg.jp/")->setHomepage("http://www.kegg.jp/")->setRights("use")->setRights("no-commercial")->setLicense("http://www.kegg.jp/kegg/legal.html")->setDataset("http://identifiers.org/kegg/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = parent::getDate(filemtime($odir . $ofile));
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$db} ")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/kegg/kegg.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     }
     // write the dataset description
     $this->setWriteFile($odir . $this->getBio2RDFReleaseFile());
     $this->getWriteFile()->write($dataset_description);
     $this->getWriteFile()->close();
 }
Beispiel #17
0
 function Run()
 {
     // get the file list
     if (parent::getParameterValue('files') == 'all') {
         $files = array('all');
     } else {
         $files = explode(",", parent::getParameterValue('files'));
     }
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     $dataset_description = '';
     foreach ($files as $file) {
         $download = parent::getParameterValue('download');
         $version = parent::getParameterValue("version");
         $zip_file = ucfirst($file) . ".mitab." . $version . ".txt.zip";
         $lfile = $ldir . $zip_file;
         $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
         $ofile = "irefindex-" . $file . "." . parent::getParameterValue('output_format');
         if (!file_exists($lfile)) {
             trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE);
             $download = true;
         }
         $rfile = $rdir . $zip_file;
         if ($download == true) {
             echo "downloading {$rfile}" . PHP_EOL;
             if (FALSE === Utils::DownloadSingle($rfile, $lfile)) {
                 trigger_error("Error in Download");
                 return FALSE;
             }
         }
         $zin = new ZipArchive();
         if ($zin->open($lfile) === FALSE) {
             trigger_error("Unable to open {$lfile}");
             exit;
         }
         if ($zin->numFiles != 1) {
             trigger_error("Found more than one file ... using first file");
         }
         $f = $zin->statIndex(0);
         $base_file = $f['name'];
         if (($fp = $zin->getStream($base_file)) === FALSE) {
             trigger_error("Unable to get {$base_file} in ziparchive {$lfile}");
             return FALSE;
         }
         parent::setReadFile($lfile);
         parent::getReadFile()->setFilePointer($fp);
         echo "Processing " . $file . " ...";
         parent::setWriteFile($odir . $ofile, true);
         if ($this->Parse() === FALSE) {
             trigger_error("Parsing Error");
             exit;
         }
         parent::writeRDFBufferToWriteFile();
         parent::getWriteFile()->close();
         $zin->close();
         echo "Done!" . PHP_EOL;
         $graph_uri = parent::getGraphURI();
         if (parent::getParameterValue('dataset_graph') == true) {
             parent::setGraphURI(parent::getDatasetURI());
         }
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("iRefIndex ({$zip_file}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/zip")->setPublisher("http://irefindex.uio.no")->setHomepage("http://irefindex.uio.no")->setRights("use")->setRights("by-attribution")->setRights("no-commercial")->setLicense("http://irefindex.uio.no/wiki/README_MITAB2.6_for_iRefIndex#License")->setDataset("http://identifiers.org/irefindex/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/irefindex/irefindex.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
         parent::setGraphURI($graph_uri);
     }
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     return TRUE;
 }
Beispiel #18
0
 public function Run()
 {
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     // make sure we have the zip archive
     //which files are to be converted?
     $selectedPackage = trim(parent::getParameterValue('files'));
     if ($selectedPackage == 'all') {
         $files = $this->getPackageMap();
     } else {
         $sel_arr = explode(",", $selectedPackage);
         $pm = $this->getPackageMap();
         $files = array();
         foreach ($sel_arr as $a) {
             if (array_key_exists($a, $pm)) {
                 $files[$a] = $pm[$a];
             }
         }
     }
     $dataset_description = '';
     foreach ($files as $key => $value) {
         $lfile = $ldir . $value['filename'];
         if (!file_exists($lfile) && parent::getParameterValue('download') == false) {
             trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE);
             $this->SetParameterValue('download', true);
         }
         //download all files [except mapping file]
         if ($this->GetParameterValue('download') == true) {
             $rfile = $value["file_url"];
             echo "downloading " . var_dump($value["file_url"]) . " ... ";
             utils::downloadSingle($rfile, $lfile);
         }
         if ($key == "taxdmp" || $key == "gi2taxid_protein" || $key == "gi2taxid_nucleotide") {
             //get the name of the zip archive
             $lfile = $value["filename"];
             // make sure we have the zip archive
             $zinfile = $ldir . $lfile;
             $zin = new ZipArchive();
             if ($zin->open($zinfile) === FALSE) {
                 trigger_error("Unable to open {$zinfile}");
                 exit;
             }
             //now iterate over the files in the ziparchive
             $source_file = (new DataResource($this))->setURI($value['file_url'])->setTitle('NCBI Taxonomy - ' . $key)->setRetrievedDate(date("Y-m-d\\TH:i:sP", filemtime($ldir . $lfile)))->setFormat('text/tab-separated-value')->setFormat('application/zip')->setPublisher('http://www.ncbi.nlm.nih.gov')->setHomepage('http://www.ncbi.nlm.nih.gov/taxonomy')->setRights('use')->setRights('attribution')->setLicense('https://www.nlm.nih.gov/copyright.html')->setDataset(parent::getDatasetURI());
             $prefix = parent::getPrefix();
             $bVersion = parent::getParameterValue('bio2rdf_release');
             $date = date("Y-m-d\\TH:i:sP");
             $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$key}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/taxonomy/taxonomy.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("restricted-by-source-license")->setLicense("http://creativecommons/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
             $dataset_description .= $output_file->toRDF() . $source_file->toRDF();
             foreach ($value["contents"] as $k => $fn) {
                 if ($k == "names" || $k == "nodes" || $k == "citations" || $k == "gencode" || $k == "division" || $k == "gi_taxid_prot" || $k == "gi_taxid_nucl") {
                     //if($k !== 'citations') continue;
                     $fpin = $zin->getStream($fn);
                     if (!$fpin) {
                         trigger_error("Unable to get pointer to {$fn} in {$zinfile}");
                         exit("failed\n");
                     }
                     $gzoutfile = $odir . "taxonomy-{$k}" . "." . parent::getParameterValue('output_format');
                     //set the write file
                     $gz = strstr(parent::getParameterValue('output_format'), 'gz') ? true : false;
                     parent::setReadFile($ldir . $lfile);
                     parent::getReadFile()->SetFilePointer($fpin);
                     parent::setWriteFile($gzoutfile, $gz);
                     echo "processing {$fn}...\n";
                     $this->{$k}();
                     $this->GetWriteFile()->Close();
                     echo "done!" . PHP_EOL;
                     parent::clear();
                 }
                 //if $k
             }
             //foreach
         }
         //if key taxdmp
         $this->setWriteFile($odir . $this->getBio2RDFReleaseFile());
         $this->getWriteFile()->write($dataset_description);
         $this->getWriteFile()->close();
     }
 }
Beispiel #19
0
 function process()
 {
     if (parent::getParameterValue('files') == 'all') {
         $files = explode("|", parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(",", parent::getParameterValue('files'));
     }
     $ldir = parent::getParameterValue('indir');
     $rdir = parent::getParameterValue('download_url');
     $odir = parent::getParameterValue('outdir');
     $rfiles = array("dbxref" => "curation/chromosomal_feature/dbxref.tab", "features" => "curation/chromosomal_feature/SGD_features.tab", "domains" => "curation/calculated_protein_info/domains/domains.tab", "protein" => "curation/calculated_protein_info/protein_properties.tab", "goa" => "curation/literature/gene_association.sgd.gz", "goslim" => "curation/literature/go_slim_mapping.tab", "complex" => "curation/literature/go_protein_complex_slim.tab", "interaction" => "curation/literature/interaction_data.tab", "phenotype" => "curation/literature/phenotype_data.tab", "pathways" => "curation/literature/biochemical_pathways.tab", "mapping" => "mapping");
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $gz = false;
     if (strstr(parent::getParameterValue('output_format'), "gz")) {
         $gz = true;
     }
     if (parent::getParameterValue('one_file') == true) {
         $ofile = "sgd." . parent::getParameterValue('output_format');
         parent::setWriteFile($odir . $ofile, $gz);
     }
     $dataset_description = '';
     foreach ($files as $file) {
         $ext = substr(strrchr($rfiles[$file], '.'), 1);
         if ($ext == "tab") {
             $lfile = "sgd_" . $file . ".tab";
         } elseif ($ext = "gz") {
             $lfile = "sgd_" . $file . ".tab.gz";
         }
         $rfile = $rdir . $rfiles[$file];
         if (!file_exists($ldir . $lfile) && parent::getParameterValue('download') == false && $file != 'mapping') {
             trigger_error($ldir . $lfile . " not found. Will attempt to download.", E_USER_NOTICE);
             Utils::DownloadSingle($rfile, $ldir . $lfile);
         }
         if (parent::getParameterValue('one_file') == false) {
             $ofile = "sgd_" . $file . '.' . parent::getParameterValue('output_format');
             parent::setWriteFile($odir . $ofile, $gz);
         }
         //parse file
         parent::setReadFile($ldir . $lfile, $gz);
         $fnx = $file;
         echo "Processing {$file}... ";
         $this->{$fnx}();
         echo PHP_EOL . "done!";
         //write RDF to file
         parent::writeRDFBufferToWriteFile();
         //close write file
         if (parent::getParameterValue('one_file') == false) {
             parent::getWriteFile()->close();
         }
         echo PHP_EOL;
         // generate the dataset release file
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Saccharomyces Genome Database ({$file})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($ldir . $lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://www.yeastgenome.org/")->setHomepage("http://www.yeastgenome.org/")->setRights("use")->setLicense("http://www.stanford.edu/site/terms.html")->setDataset("http://identifiers.org/sgd/");
         $dataset_description .= $source_file->toRDF();
         if (parent::getParameterValue('one_file') == false) {
             $prefix = parent::getPrefix();
             $bVersion = parent::getParameterValue('bio2rdf_release');
             $date = date("Y-m-d\\TG:i:s\\Z");
             $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sgd/sgd.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
             if ($gz) {
                 $output_file->setFormat("application/gzip");
             }
             if (strstr(parent::getParameterValue('output_format'), "nt")) {
                 $output_file->setFormat("application/n-triples");
             } else {
                 $output_file->setFormat("application/n-quads");
             }
             $dataset_description .= $output_file->toRDF();
         }
     }
     //foreach
     //set graph URI back to default
     parent::setGraphURI($graph_uri);
     if (parent::getParameterValue('one_file') == true) {
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sgd/sgd.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     }
     //write dataset description to file
     echo "Generating dataset description... " . PHP_EOL;
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #20
0
 function process()
 {
     // get the file list
     if (parent::getParameterValue('files') == 'all') {
         $files = explode("|", parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(",", parent::getParameterValue('files'));
     }
     $dataset_description = '';
     //set directory values
     $ldir = parent::getParameterValue('indir');
     $rdir = parent::getParameterValue('download_url');
     $odir = parent::getParameterValue('outdir');
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $gz_suffix = ".gz";
     foreach ($files as $file) {
         if ($file == 'chem_gene_ixn_types') {
             $suffix = '.tsv';
         } else {
             if ($file == 'exposure_ontology') {
                 $suffix = '.obo';
             } else {
                 $suffix = ".tsv.gz";
             }
         }
         $lfile = $ldir . $file . $gz_suffix;
         $rfile = $rdir . 'CTD_' . $file . $suffix;
         if (!file_exists($lfile)) {
             trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE);
             if ($suffix == ".tsv.gz") {
                 Utils::DownloadSingle($rfile, $lfile);
             } else {
                 Utils::DownloadSingle($rfile, "compress.zlib://" . $lfile);
             }
         }
         $out_suffix = parent::getParameterValue('output_format');
         $ofile = "ctd_" . $file . "." . $out_suffix;
         $gz = false;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         echo "Processing " . $file . " ...";
         parent::setWriteFile($odir . $ofile, $gz);
         //set read file
         parent::setReadFile($lfile, TRUE);
         $fnx = "CTD_" . $file;
         $this->{$fnx}();
         //close write file
         parent::getWriteFile()->close();
         parent::clear();
         echo "done!" . PHP_EOL;
         // generate the dataset release file
         echo "Generating dataset description... ";
         if ($file == "chemicals") {
             $dataset = "http://identifiers.org/ctd.chemical/";
         } else {
             if ($file == "diseases") {
                 $dataset = "http://identifiers.org/ctd.disease/";
             } else {
                 if ($file == "genes") {
                     $dataset = "http://identifiers.org/ctd.gene/";
                 } else {
                     $dataset = null;
                 }
             }
         }
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Comparative Toxicogenomics Database ({$file}.{$gz_suffix}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://ctdbase.org/")->setHomepage("http://ctdbase.org/")->setRights("use")->setRights("by-attribution")->setRights("no-commercial")->setLicense("http://ctdbase.org/about/legal.jsp")->setDataset($dataset);
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ctd/ctd.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     }
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #21
0
 function Run()
 {
     $dataset_description = '';
     $idir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     if (parent::getParameterValue('ncbo_api_key')) {
         $apikey = "?apikey=" . parent::getParameterValue('ncbo_api_key');
     } else {
         if (file_exists(parent::getParameterValue('ncbo_api_key_file'))) {
             $apikey = "?apikey=" . trim(file_get_contents(parent::getParameterValue('ncbo_api_key_file')));
         } else {
             echo "You must provide an NCBO API key either as a file or as a parameter" . PHP_EOL;
             exit;
         }
     }
     // get the list of ontologies from bioportal
     $olist = $idir . "ontolist.json";
     if (!file_exists($olist) || parent::getParameterValue('download') == 'true') {
         echo "downloading ontology list...";
         $r_olist = parent::getParameterValue('download_url') . 'ontologies' . $apikey;
         file_put_contents($olist, file_get_contents($r_olist));
         echo "done" . PHP_EOL;
     }
     // include
     if (parent::getParameterValue('files') == 'all') {
         $include_list = array('all');
     } else {
         $include_list = explode(",", parent::getParameterValue('files'));
     }
     // exclude
     $exclude_list = array();
     if (parent::getParameterValue('exclude') != '') {
         $exclude_list = explode(",", parent::getParameterValue('exclude'));
     }
     $continue_from = parent::getParameterValue('continue_from');
     $go = true;
     if ($continue_from) {
         $go = false;
     }
     // now go through the list of ontologies
     $ontologies = json_decode(file_get_contents($olist), false);
     $total = count($ontologies);
     foreach ($ontologies as $i => $o) {
         $label = (string) $o->name;
         $abbv = (string) $o->acronym;
         if ($continue_from and $continue_from == $abbv) {
             $go = true;
         }
         if ($go == false) {
             continue;
         }
         if (array_search($abbv, $exclude_list) !== FALSE) {
             continue;
         }
         if ($include_list[0] != 'all') {
             // ignore if we don't find it in the include list OR we do find it in the exclude list
             if (array_search($abbv, $include_list) === FALSE) {
                 continue;
             }
         } else {
             if (array_search($abbv, $exclude_list) !== FALSE) {
                 continue;
             }
         }
         // get info on the latest submission
         $uri = $o->links->latest_submission;
         $ls = json_decode(file_get_contents($uri . $apikey), true);
         if (!isset($ls['hasOntologyLanguage'])) {
             echo 'insufficient metadata' . PHP_EOL;
             continue;
         }
         $format = strtolower($ls['hasOntologyLanguage']);
         if ($format != 'owl' and $format != 'obo') {
             continue;
         }
         echo "Processing ({$i}/{$total}) {$abbv} ... ";
         $version = $ls['version'];
         if (isset($ls['homepage'])) {
             $homepage = $ls['homepage'];
         }
         if (isset($ls['description'])) {
             $description = $ls['description'];
         }
         $rfile = $ls['ontology']['links']['download'];
         $lfile = $abbv . "." . $format . ".gz";
         if (!file_exists($idir . $lfile) or parent::getParameterValue('download') == 'true') {
             echo "downloading ... ";
             $ch = curl_init();
             // create cURL handle (ch)
             if (!$ch) {
                 die("Couldn't initialize a cURL handle");
             }
             $ret = curl_setopt($ch, CURLOPT_URL, $rfile . $apikey);
             $ret = curl_setopt($ch, CURLOPT_HEADER, 1);
             $ret = curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
             $ret = curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
             $ret = curl_setopt($ch, CURLOPT_TIMEOUT, 300);
             $ret = curl_exec($ch);
             if (!$ret) {
                 echo "no content";
                 continue;
             }
             $header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
             $header = substr($ret, 0, $header_size);
             preg_match("/filename=\"([^\"]+)\"/", $header, $m);
             if (isset($m[1])) {
                 $filename = $m[1];
                 if (strstr($filename, ".zip")) {
                     continue;
                 }
             } else {
                 echo "error: no filename" . PHP_EOL;
                 continue;
             }
             $body = substr($ret, $header_size);
             // now get the file suffix
             $path = pathinfo($filename);
             if (isset($path['extension'])) {
                 $ext = $path['extension'];
             } else {
                 echo "error: no extension" . PHP_EOL;
                 continue;
             }
             $lz = "compress.zlib://" . $idir . $lfile;
             file_put_contents($lz, $body);
             echo "done" . PHP_EOL;
         }
         if (file_exists($idir . $lfile)) {
             parent::setReadFile($idir . $lfile, true);
             $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
             $ofile = strtolower($abbv) . "." . parent::getParameterValue('output_format');
             parent::setWriteFile($odir . $ofile, $gz);
             // process
             echo "converting ... ";
             set_time_limit(0);
             // let's double check the format
             $fp = gzopen($idir . $lfile, "r");
             $l = gzgets($fp);
             if (strstr($l, "xml")) {
                 $format = "owl";
             }
             gzclose($fp);
             if ($format == 'obo') {
                 $this->OBO2RDF($abbv);
             } else {
                 if ($format == 'owl') {
                     $this->OWL2RDF($abbv);
                     if (isset($this->unmapped_uri)) {
                         print_r($this->unmapped_uri);
                     }
                     unset($this->unmapped_uri);
                 } else {
                     echo "no processor for {$label} (format {$format})" . PHP_EOL;
                 }
             }
             if (!file_exists($odir . $ofile)) {
                 echo "no output" . PHP_EOL;
                 continue;
             }
             parent::getWriteFile()->close();
             parent::clear();
             $bVersion = parent::getParameterValue('bio2rdf_release');
             $source_file = (new DataResource($this))->setURI($rfile)->setTitle("{$label}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($idir . $lfile)))->setFormat("obo")->setPublisher("http://www.bioontology.org")->setHomepage("http://bioportal.bioontology.org/")->setRights("use-share-modify")->setLicense("http://www.bioontology.org/terms")->setDataset("http://identifiers.org/{$abbv}");
             $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/bioportal/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$abbv}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/bioportal/bioportal.php")->setCreateDate(date("Y-m-d\\TG:i:s\\Z", filemtime($odir . $ofile)))->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/bioportal/bioportal.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
             if ($gz) {
                 $output_file->setFormat("application/gzip");
             }
             if (strstr(parent::getParameterValue('output_format'), "nt")) {
                 $output_file->setFormat("application/n-triples");
             } else {
                 $output_file->setFormat("application/n-quads");
             }
             if (!isset($dd)) {
                 $dd = fopen($odir . 'bio2rdf-bioportal.nq', "w");
             }
             fwrite($dd, $source_file->toRDF() . $output_file->toRDF());
             fflush($dd);
             echo "done!" . PHP_EOL;
         }
     }
     if (isset($dd)) {
         fclose($dd);
     }
     echo "done!" . PHP_EOL;
 }