Beispiel #1
0
 function Run()
 {
     $idir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $files = parent::getParameterValue('files');
     if ($files == 'all') {
         $list = explode('|', parent::getParameterList('files'));
         array_shift($list);
     } else {
         $list = explode(',', parent::getParameterValue('files'));
     }
     $dataset_description = '';
     foreach ($list as $item) {
         $lfile = $idir . $item . '.rpt';
         $rfile = parent::getParameterValue('download_url') . $item . '.rpt';
         if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
             echo "downloading {$item}...";
             $ret = Utils::DownloadSingle($rfile, $lfile);
             if ($ret != true) {
                 continue;
             }
         }
         parent::setReadFile($lfile, true);
         echo "Processing {$item}...";
         $ofile = $odir . $item . '.' . parent::getParameterValue('output_format');
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
         parent::setWriteFile($ofile, $gz);
         $this->{$item}();
         parent::getWriteFile()->close();
         parent::getReadFile()->close();
         echo "Done" . PHP_EOL;
         parent::clear();
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("MGI {$item}")->setRetrievedDate(date("Y-m-d\\TH:i:s", filemtime($lfile)))->setFormat("text")->setPublisher("http://www.informatics.jax.org")->setHomepage("http://www.informatics.jax.org")->setRights("use")->setLicense("http://www.informatics.jax.org/mgihome/other/copyright.shtml")->setDataset("http://identifiers.org/mgi/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TH:i:s");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$item} in {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/mgi/mgi.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     }
     //foreach
     // generate the dataset release file
     $this->setWriteFile($odir . parent::getBio2RDFReleaseFile());
     $this->getWriteFile()->write($dataset_description);
     $this->getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #2
0
 function Run()
 {
     $file = "hgnc_complete_set.txt.gz";
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     $lfile = $ldir . $file;
     if (!file_exists($lfile) && parent::getParameterValue('download') == false) {
         trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE);
         parent::setParameterValue('download', true);
     }
     //download the hgnc file
     $rfile = null;
     if (parent::getParameterValue('download') == true) {
         $rfile = $rdir;
         echo "downloading {$file} ... ";
         Utils::DownloadSingle($rfile, $lfile);
     }
     $ofile = $odir . "hgnc." . parent::getParameterValue('output_format');
     $gz = false;
     if (strstr(parent::getParameterValue('output_format'), "gz")) {
         $gz = true;
     }
     parent::setWriteFile($ofile, $gz);
     parent::setReadFile($lfile, true);
     echo "processing {$file}... ";
     $this->process();
     echo "done!" . PHP_EOL;
     //close write file
     parent::getWriteFile()->close();
     echo PHP_EOL;
     // generate the dataset release file
     echo "generating dataset release file... ";
     $dataset_description = '';
     $source_file = (new DataResource($this))->setURI($rdir)->setTitle('HUGO Gene Nomenclature Committee (HGNC)')->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat('text/tab-separated-value')->setFormat('application/zip')->setPublisher('http://www.genenames.org/')->setHomepage('http://www.genenames.org/data/gdlw_columndef.html')->setRights('use')->setRights('attribution')->setLicense('http://www.genenames.org/about/overview')->setDataset(parent::getDatasetURI());
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/hgnc/hgnc.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("restricted-by-source-license")->setLicense("http://creativecommons/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     $this->setWriteFile($odir . $this->getBio2RDFReleaseFile());
     $this->getWriteFile()->write($dataset_description);
     $this->getWriteFile()->close();
 }
Beispiel #3
0
 function Run()
 {
     $file = "homologene.data";
     $ldir = $this->GetParameterValue('indir');
     $odir = $this->GetParameterValue('outdir');
     $rdir = $this->GetParameterValue('download_url');
     $lfile = $ldir . $file;
     if (!file_exists($lfile)) {
         trigger_error($file . " not found. Will attempt to download.", E_USER_NOTICE);
         parent::setParameterValue('download', true);
     }
     //download
     $rfile = $rdir . $file;
     if ($this->GetParameterValue('download') == true) {
         echo "downloading {$file} ... ";
         utils::downloadSingle($rfile, $lfile);
     }
     $ofile = 'homologene.' . parent::getParameterValue('output_format');
     $gz = strstr(parent::getParameterValue('output_format'), "gz") ? $gz = true : ($gz = false);
     parent::setReadFile($lfile);
     parent::setWriteFile($odir . $ofile, $gz);
     echo "processing {$file}... ";
     $this->process();
     echo "done!" . PHP_EOL;
     parent::getWriteFile()->close();
     // generate the dataset release file
     $source_file = (new DataResource($this))->setURI($rfile)->setTitle("NCBI Homologene")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setPublisher("http://www.ncbi.nlm.nih.gov")->setHomepage("http://www.ncbi.nlm.nih.gov/homologene")->setRights("use-share-modify")->setLicense("http://www.ncbi.nlm.nih.gov/About/disclaimer.html")->setDataset("http://identifiers.org/homologene/");
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/homologene/homologene.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description = $source_file->toRDF() . $output_file->toRDF();
     echo "Generating dataset description... ";
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #4
0
 function run()
 {
     // get the file list
     if ($this->GetParameterValue('files') == 'all') {
         $files = explode("|", $this->GetParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(",", $this->GetParameterValue('files'));
     }
     if ($this->getParameterValue('additional') != 'none') {
         $f = explode(",", $this->getParameterValue('additional'));
         $files = array_merge($files, $f);
     }
     $ldir = $this->GetParameterValue('indir');
     $odir = $this->GetParameterValue('outdir');
     $rdir = $this->GetParameterValue('download_url');
     $dataset_description = '';
     foreach ($files as $file) {
         $suffix = ".zip";
         $lfile = $ldir . $file . $suffix;
         $rfile = $rdir . $file . $suffix;
         if ($file == "offsides" and !file_exists($lfile)) {
             echo "downloading twosides...";
             $rfile = "http://www.pharmgkb.org/redirect.jsp?p=ftp%3A%2F%2Fftpuserd%3AGKB4ftp%40ftp.pharmgkb.org%2Fdownload%2Ftatonetti%2F3003377s-offsides.zip";
             utils::DownloadSingle($rfile, $lfile);
             echo "done" . PHP_EOL;
         } elseif ($file == "twosides" and !file_exists($lfile)) {
             echo "downloading {$file} ...";
             $rfile = "http://www.pharmgkb.org/redirect.jsp?p=ftp%3A%2F%2Fftpuserd%3AGKB4ftp%40ftp.pharmgkb.org%2Fdownload%2Ftatonetti%2F3003377s-twosides.zip";
             utils::DownloadSingle($rfile, $lfile);
             echo "done" . PHP_EOL;
         } elseif ($file == 'annotations' or $file == 'relationships') {
             if (!file_exists($lfile)) {
                 echo "Contact PharmGKB to get access to variants/clinical variants; save file as annotations.zip" . PHP_EOL;
                 continue;
             }
         } else {
             if (!file_exists($lfile) or parent::getParameterValue('download') == true) {
                 echo "Downloading {$lfile} ... ";
                 Utils::DownloadSingle('https://www.pharmgkb.org/download.do?objId=' . $file . '.zip&dlCls=common', $lfile);
                 echo "done" . PHP_EOL;
             }
         }
         // get a pointer to the file in the zip archive
         if (!file_exists($lfile)) {
             echo "no local copy of {$lfile} . skipping" . PHP_EOL;
             continue;
         }
         $zin = new ZipArchive();
         if ($zin->open($lfile) === FALSE) {
             trigger_error("Unable to open {$lfile}");
             exit;
         }
         $zipentries = array();
         if ($file == "annotations") {
             // exclude: 'clinical_ann.tsv','study_parameters.tsv'
             $zipentries = array('clinical_ann_metadata.tsv', 'var_drug_ann.tsv', 'var_pheno_ann.tsv', 'var_fa_ann.tsv');
         } else {
             if ($file == "pathways") {
                 for ($i = 0; $i < $zin->numFiles; $i++) {
                     $stat = $zin->statIndex($i);
                     $entry = $stat['name'];
                     $ext = pathinfo($entry, PATHINFO_EXTENSION);
                     if ($ext != "txt") {
                         $zipentries[] = $entry;
                     }
                 }
             } else {
                 if ($file == "relationships") {
                     $zipentries = array("relationships.tsv");
                 } else {
                     if ($file == 'offsides') {
                         $zipentries = array('3003377s-offsides.tsv');
                     } else {
                         if ($file == 'twosides') {
                             $zipentries = array('3003377s-twosides.tsv');
                         } else {
                             $zipentries = array($file . ".tsv");
                         }
                     }
                 }
             }
         }
         // set the write file, parse, write and close
         $suffix = parent::getParameterValue('output_format');
         $outfile = $file . '.' . $suffix;
         $gz = false;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         $this->SetWriteFile($odir . $outfile, $gz);
         foreach ($zipentries as $zipentry) {
             if (($fp = $zin->getStream($zipentry)) === FALSE) {
                 trigger_error("Unable to get {$file}.tsv in ziparchive {$lfile}");
                 return FALSE;
             }
             $this->SetReadFile($lfile);
             $this->GetReadFile()->SetFilePointer($fp);
             if ($file == "annotations") {
                 $fnx = substr($zipentry, 0, strpos($zipentry, ".tsv"));
                 echo "processing {$zipentry}..";
             } else {
                 if ($file == 'pathways') {
                     $fnx = 'pathways';
                     echo "processing {$fnx} ({$zipentry})... ";
                 } else {
                     $fnx = $file;
                     echo "processing {$fnx} ... ";
                 }
             }
             $this->{$fnx}();
             parent::writeRDFBufferToWriteFile();
             parent::clear();
             echo "done!" . PHP_EOL;
             // generate the dataset release file
             $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Pharmacogenomics Knowledge Base ({$zipentry})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/zip")->setPublisher("http://www.pharmgkb.org/")->setHomepage("http://www.pharmgkb.org/")->setRights("use")->setRights("no-commercial")->setLicense("http://www.pharmgkb.org/page/policies")->setDataset("http://identifiers.org/pharmgkb/");
             $prefix = parent::getPrefix();
             $bVersion = parent::getParameterValue('bio2rdf_release');
             $date = date("Y-m-d\\TG:i:s\\Z");
             $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} {$file} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pharmgkb/pharmgkb.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
             if ($gz) {
                 $output_file->setFormat("application/gzip");
             }
             if (strstr(parent::getParameterValue('output_format'), "nt")) {
                 $output_file->setFormat("application/n-triples");
             } else {
                 $output_file->setFormat("application/n-quads");
             }
             $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
         }
         $this->GetWriteFile()->Close();
     }
     // foreach
     echo "Generating dataset description... ";
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #5
0
 function run()
 {
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     // get the snps from pharmgkb
     $snps = explode(",", parent::getParameterValue('files'));
     if ($snps[0] == 'all') {
         $snps = $this->getSNPs();
     } else {
         if ($snps[0] == 'clinical') {
             $snps = $this->getSNPs(true);
         } else {
             if ($snps[0] == 'omim') {
                 $lfile = $ldir . 'snp_omimvar.txt';
                 if (!file_exists($lfile) || parent::getParameterValue('download') == true) {
                     $ret = utils::DownloadSingle('ftp://ftp.ncbi.nlm.nih.gov/snp/Entrez/snp_omimvar.txt', $lfile);
                 }
                 $snps = $this->processOMIMVar($lfile);
             } else {
                 if ($snps[0] == 'pharmgkb') {
                     $lfile = $ldir . 'pharmgkb.snp.zip';
                     if (!file_exists($lfile) || parent::getParameterValue('download') == true) {
                         $ret = utils::DownloadSingle('http://www.pharmgkb.org/download.do?objId=rsid.zip&dlCls=common', $lfile);
                     }
                     $snps = $this->processPharmGKBSnps($lfile);
                 }
             }
         }
     }
     $outfile = $odir . "dbsnp." . parent::getParameterValue('output_format');
     $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
     parent::setWriteFile($outfile, $gz);
     $n = count($snps);
     $z = 0;
     foreach ($snps as $i => $snp) {
         $file = $snp . '.xml.gz';
         $infile = $ldir . $file;
         $rfile = parent::getParameterValue('download_url') . $snp;
         //$outfile = $odir.$snp.".".parent::getParameterValue('output_format');
         // check if exists
         $download = false;
         if (!file_exists($infile)) {
             //trigger_error($lfile." not found. Will attempt to download. ", E_USER_NOTICE);
             parent::setParameterValue('download', true);
         }
         // download
         if (parent::getParameterValue('download') == true) {
             trigger_error("Downloading {$file}", E_USER_NOTICE);
             $ret = utils::downloadSingle($rfile, "compress.zlib://" . $infile, true);
             if ($ret === false) {
                 continue;
             }
         }
         // process
         echo "Processing {$snp} (" . ($i + 1) . "/{$n})" . PHP_EOL;
         $this->parse($infile);
         parent::writeRDFBufferToWriteFile();
         if ($z++ % 10000 == 0) {
             parent::clear();
         }
     }
     parent::getWriteFile()->close();
     // generate the dataset description file
     $source_file = (new DataResource($this))->setURI($rfile)->setTitle("dbSNP " . parent::getDatasetVersion())->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z"))->setFormat("application/xml")->setPublisher("http://www.ncbi.nlm.nih.gov")->setHomepage("http://www.ncbi.nlm.nih.gov/SNP/")->setRights("use-share-modify")->setLicense("http://www.ncbi.nlm.nih.gov/About/disclaimer.html")->setDataset("http://identifiers.org/dbsnp/");
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/dbsnp/dbsnp.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description = $source_file->toRDF() . $output_file->toRDF();
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
 }
Beispiel #6
0
 function process()
 {
     if (parent::getParameterValue('files') == 'all') {
         $files = explode("|", parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(",", parent::getParameterValue('files'));
     }
     $remote_files = array("human" => "human_genes.zip", "models" => "models_genes.zip");
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     $dataset_description = '';
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     foreach ($files as $file) {
         $lfile = $ldir . $remote_files[$file];
         $rfile = $rdir . $remote_files[$file];
         if (!file_exists($lfile)) {
             trigger_error($lfile . " not found. Will attempt to download." . PHP_EOL, E_USER_WARNING);
             echo "Downloading {$rfile}... ";
             Utils::DownloadSingle($rfile, $lfile);
             echo "done!" . PHP_EOL;
         }
         $suffix = parent::getParameterValue('output_format');
         $ofile = "genage_" . $file . '.' . $suffix;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         $zin = new ZipArchive();
         if ($zin->open($lfile) === FALSE) {
             trigger_error("Unable to open {$lfile}");
             exit;
         }
         if ($file == "human") {
             $zipentry = "genage_human.csv";
         } else {
             if ($file == "models") {
                 $zipentry = "genage_models.csv";
             }
         }
         if (($fp = $zin->getStream($zipentry)) === FALSE) {
             trigger_error("Unable to get {$zipentry} in ziparchive {$lfile}");
             return FALSE;
         }
         parent::SetReadFile($lfile);
         parent::GetReadFile()->SetFilePointer($fp);
         // set the write file, parse, write and close
         $suffix = parent::getParameterValue('output_format');
         $outfile = "genage_" . $file . '.' . $suffix;
         $gz = false;
         if (strstr($suffix, "gz")) {
             $gz = true;
         }
         parent::setWriteFile($odir . $ofile, $gz);
         echo "Processing {$lfile}... ";
         $fnx = $file;
         $this->{$fnx}();
         echo "done!" . PHP_EOL;
         parent::getWriteFile()->close();
         // generate the dataset release file
         echo "Generating dataset description for {$ofile}... ";
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Human Ageing Genomic Resources GenAge database (" . $remote_files[$file] . ")")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/comma-separated-value")->setFormat("application/gzip")->setPublisher("http://genomics.senescence.info/")->setHomepage("http://genomics.senescence.info/genes/")->setRights("use")->setLicense("http://genomics.senescence.info/legal.html")->setDataset("http://identifiers.org/genage/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/genage/genage.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
         echo "done!" . PHP_EOL;
     }
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
 }
Beispiel #7
0
 public function run()
 {
     if (parent::getParameterValue('files') == 'all') {
         $files = explode("|", parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(",", parent::getParameterValue('files'));
     }
     $release = parent::getParameterValue('release');
     $releaseb = "WS249";
     $remote_files = array("geneIDs" => "species/c_elegans/annotation/geneIDs/c_elegans.PRJNA13758." . $release . ".geneIDs.txt.gz", "functional_descriptions" => "species/c_elegans/annotation/functional_descriptions/c_elegans.PRJNA13758." . $release . ".functional_descriptions.txt.gz", "gene_interactions" => "species/c_elegans/annotation/gene_interactions/c_elegans.PRJNA13758." . $release . ".gene_interactions.txt.gz", "gene_associations" => "releases/current-production-release/ONTOLOGY/gene_association." . $releaseb . ".wb", "phenotype_associations" => "releases/current-production-release/ONTOLOGY/phenotype_association." . $releaseb . ".wb");
     $local_files = array("geneIDs" => "wormbase." . parent::getParameterValue('release') . ".genes.txt.gz", "functional_descriptions" => "wormbase." . parent::getParameterValue('release') . ".functional_descriptions.txt.gz", "gene_interactions" => "wormbase." . parent::getParameterValue('release') . ".gene_interactions.txt.gz", "gene_associations" => "wormbase." . parent::getParameterValue('release') . ".gene_association.wb", "phenotype_associations" => "wormbase." . parent::getParameterValue('release') . ".phenotype_associations.wb");
     $idir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     $dataset_description = '';
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     foreach ($files as $file) {
         $lfile = $idir . $local_files[$file];
         $rfile = $rdir . $remote_files[$file];
         if (!file_exists($lfile) or parent::getParameterValue('download') == true) {
             trigger_error($lfile . " not found. Will attempt to download." . PHP_EOL, E_USER_WARNING);
             echo "Downloading {$rfile}... ";
             Utils::DownloadSingle($rfile, $lfile);
             echo "done!" . PHP_EOL;
         }
         if (strstr($lfile, "gz")) {
             parent::setReadFile($lfile, TRUE);
         } else {
             parent::setReadFile($lfile, FALSE);
         }
         $suffix = parent::getParameterValue('output_format');
         $ofile = "wormbase." . $file . "." . $suffix;
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
         parent::setWriteFile($odir . $ofile, $gz);
         echo "Processing {$file}... ";
         $fnx = $file;
         $this->{$fnx}();
         echo "done!" . PHP_EOL;
         parent::getWriteFile()->close();
         // generate the dataset release file
         echo "Generating dataset description for {$ofile}... ";
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("WormBase Release " . parent::getParameterValue('release') . " subset ({$file})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://wormbase.org/")->setHomepage("http://wormbase.org/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("http://www.wormbase.org/about/policies")->setDataset("http://identifiers.org/wormbase/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/wormbase/wormbase.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
         echo "done!" . PHP_EOL;
     }
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
 }
 /** parse directory of files */
 function parse_dir()
 {
     $ignore = array("..", '.', '.DS_STORE', "0");
     $this->setCheckPoint('dataset');
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $dataset_file = parent::getParameterValue("outdir") . parent::getBio2RDFReleaseFile();
     $fp = fopen($dataset_file, "w");
     if ($fp === FALSE) {
         trigger_error("Unable to open {$dataset_file}", E_USER_ERROR);
         return false;
     }
     $ids = explode(",", parent::getParameterValue('id_list'));
     $indir = parent::getParameterValue('indir');
     echo "Processing {$indir}\n";
     $outfile = "clinicaltrials." . parent::getParameterValue('output_format');
     $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
     parent::setWriteFile(parent::getParameterValue("outdir") . $outfile, $gz);
     $files = glob($indir . "NCT*");
     foreach ($files as $i => $file) {
         if ($i % 10000 == 0) {
             parent::clear();
         }
         $trial_id = basename($file, '.xml');
         if (parent::getParameterValue('id_list') == '' || in_array($trial_id, $ids)) {
             if (filesize($file) != 0) {
                 echo "Processing {$trial_id}" . PHP_EOL;
                 $this->process_file($file);
             } else {
                 echo "Processing {$trial_id} -> Empty!" . PHP_EOL;
             }
         }
     }
     echo "Finished." . PHP_EOL;
     parent::getWriteFile()->close();
     // make the dataset description
     parent::setGraphURI(parent::getDatasetURI());
     $rfile = "http://clinicaltrials.gov/ct2/show/NCT_ID?resultsxml=true";
     $source_version = parent::getDatasetVersion();
     // dataset description
     $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Clinicaltrials")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($file)))->setFormat("application/xml")->setPublisher("http://clinicaltrials.gov/")->setHomepage("http://clinicaltrials.gov/")->setRights("use")->setRights("by-attribution")->setLicense("http://clinicaltrials.gov/ct2/about-site/terms-conditions")->setDataset("http://identifiers.org/clinicaltrials/");
     parent::writeToReleaseFile($source_file->toRDF());
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} v{$source_version}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/clinicaltrials/clinicaltrials.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     parent::writeToReleaseFile($output_file->toRDF());
     parent::closeReleaseFile();
     // write the dataset description file
     fclose($fp);
 }
Beispiel #9
0
 function process()
 {
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     //which files are to be converted?
     $files = trim($this->GetParameterValue('files'));
     if ($files == 'all') {
         $files = $this->getPackageMap();
     } else {
         $sel_arr = explode(",", $files);
         $pm = $this->getPackageMap();
         $files = array();
         foreach ($sel_arr as $a) {
             if (array_key_exists($a, $pm)) {
                 $files[$a] = $pm[$a];
             }
         }
     }
     if ($this->getParameterValue('limit_organisms') == true) {
         $this->taxids = array_flip(explode(",", $this->getParameterValue('organisms')));
     }
     //set dataset graph to be dataset URI
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $dataset_description = '';
     //now iterate over the files array
     foreach ($files as $module => $rfilename) {
         $file = $module . ".gz";
         $lfile = $ldir . $file;
         $rfile = $rdir . $rfilename;
         // download
         if (!file_exists($lfile) || parent::getParameterValue('download') == true) {
             trigger_error("{$lfile} not found. Will attempt to download.", E_USER_NOTICE);
             $myfile = $lfile;
             if ($module == "gene2sts" || $module == "gene2unigene") {
                 $myfile = "compress.zlib://" . $lfile;
             }
             echo "downloading {$module} ...";
             utils::DownloadSingle($rfile, $myfile);
             echo "done" . PHP_EOL;
         }
     }
     foreach ($files as $module => $rfilename) {
         $file = $module . ".gz";
         $lfile = $ldir . $file;
         $rfile = $rdir . $rfilename;
         $ofile = $module . "." . parent::getParameterValue('output_format');
         $gz = false;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         echo "Processing {$module} ... ";
         parent::setReadFile($lfile, true);
         parent::setWriteFile($odir . $ofile, $gz);
         $fnx = $module;
         if ($module == 'gene2refseq') {
             $fnx = 'gene2accession';
         }
         $this->{$fnx}();
         parent::clear();
         echo 'done!' . PHP_EOL;
         parent::getReadFile()->close();
         parent::getWriteFile()->close();
         // generate the dataset release file
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("NCBI Gene ({$module})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://www.ncbi.nlm.nih.gov")->setHomepage("http://www.ncbi.nlm.nih.gov/gene")->setRights("use-share-modify")->setLicense("http://www.ncbi.nlm.nih.gov/About/disclaimer.html")->setDataset("http://identifiers.org/ncbigene/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ncbigene/ncbigene.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     }
     //foreach
     //set graph URI back to default value
     parent::setGraphURI($graph_uri);
     //write dataset description to file
     echo "Generating dataset description... ";
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #10
0
 private function process()
 {
     $z = 0;
     $y = 1;
     while ($l = $this->getReadFile()->Read(200000)) {
         if ($z++ % 1000000 == 0) {
             echo $z . PHP_EOL;
             $odir = parent::getParameterValue('outdir');
             $ofile = 'iproclass.' . $y++ . "." . parent::getParameterValue('output_format');
             $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
             if (parent::getWriteFile() != null) {
                 parent::getWriteFile()->close();
                 parent::clear();
             }
             // generate a new file
             parent::setWriteFile($odir . $ofile, $gz);
         }
         $fields = explode("\t", $l);
         @($uniprot_acc = $fields[0]);
         @($uniprot = $fields[1]);
         @($gene = $fields[2]);
         @($refseq = $fields[3]);
         @($gi = $fields[4]);
         @($pdb = $fields[5]);
         @($pfam = $fields[6]);
         @($go = $fields[7]);
         @($pirsf = $fields[8]);
         @($ipi = $fields[9]);
         @($uniref_100 = $fields[10]);
         @($uniref_90 = $fields[11]);
         @($uniref_50 = $fields[12]);
         @($uniparc = $fields[13]);
         //skipping pir-psd because db no longer maintained
         @($ncbi_taxonomy = $fields[15]);
         @($mim = $fields[16]);
         @($unigene = $fields[17]);
         @($ensembl = $fields[18]);
         @($pubmed = $fields[19]);
         @($embl_genbank_ddbj = $fields[20]);
         @($embl_protein = trim($fields[21]));
         $id = $uniprot_acc;
         $id_res = $this->getNamespace() . $id;
         $id_label = "iproclass entry for uniprot:{$uniprot_acc}";
         parent::addRDF(parent::triplify($id_res, $this->getVoc() . "x-uniprot", "uniprot:" . $uniprot_acc));
         if (!empty($uniprot)) {
             $uniprot_ids = explode("; ", $uniprot);
             foreach ($uniprot_ids as $uniprot_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-uniprot", "uniprot:" . $uniprot_id));
             }
         }
         if (!empty($gene)) {
             $gene_ids = explode("; ", $gene);
             foreach ($gene_ids as $gene_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-ncbigene", "geneid:" . $gene_id));
             }
         }
         if (!empty($refseq)) {
             $refseq_ids = explode("; ", $refseq);
             foreach ($refseq_ids as $refseq_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-refseq", "refseq:" . $refseq_id));
             }
         }
         if (!empty($gi)) {
             $gi_ids = explode("; ", $gi);
             foreach ($gi_ids as $gi_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-gi", "gi:" . $gi_id));
             }
         }
         if (!empty($pdb)) {
             $pdb_ids = explode("; ", $pdb);
             foreach ($pdb_ids as $pdb_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-pdb", "pdb:" . $pdb_id));
             }
         }
         if (!empty($pfam)) {
             $pfam_ids = explode("; ", $pfam);
             foreach ($pfam_ids as $pfam_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-pfam", "pfam:" . $pfam_id));
             }
         }
         if (!empty($go)) {
             $go_ids = explode("; ", $go);
             foreach ($go_ids as $go_id) {
                 $go_id = substr($go_id, 3);
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-go", "go:" . $go_id));
             }
         }
         if (!empty($pirsf)) {
             $pirsf_ids = explode("; ", $pirsf);
             foreach ($pirsf_ids as $pirsf_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-pirsf", "pirsf:" . $pirsf_id));
             }
         }
         if (!empty($ipi)) {
             $ipi_ids = explode("; ", $ipi);
             foreach ($ipi_ids as $ipi_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-ipi", "ipi:" . $ipi_id));
             }
         }
         if (!empty($uniref_100)) {
             $uniref_100_ids = explode("; ", $uniref_100);
             foreach ($uniref_100_ids as $uniref_100_id) {
                 parent::AddRDF(parent::QQuadO_URL($id_res, "rdfs:seeAlso", "http://uniprot.org/uniref/" . $uniref_100_id));
             }
         }
         if (!empty($uniref_90)) {
             $uniref_90_ids = explode("; ", $uniref_90);
             foreach ($uniref_90_ids as $uniref_90_id) {
                 parent::AddRDF(parent::QQuadO_URL($id_res, "rdfs:seeAlso", "http://uniprot.org/uniref/" . $uniref_90_id));
             }
         }
         if (!empty($uniref_50)) {
             $uniref_50_ids = explode("; ", $uniref_50);
             foreach ($uniref_50_ids as $uniref_50_id) {
                 parent::AddRDF(parent::QQuadO_URL($id_res, "rdfs:seeAlso", "http://uniprot.org/uniref/" . $uniref_50_id));
             }
         }
         if (!empty($uniparc)) {
             $uniparc_ids = explode("; ", $uniparc);
             foreach ($uniparc_ids as $uniparc_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-uniparc", "uniparc:" . $uniparc_id) . parent::QQuadO_URL($id_res, "rdfs:seeAlso", "http://uniprot.org/uniparc/" . $uniparc_id));
             }
         }
         if (!empty($ncbi_taxonomy)) {
             $taxonomy_ids = explode("; ", $ncbi_taxonomy);
             foreach ($taxonomy_ids as $taxonomy_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-taxon", "taxon:" . $taxonomy_id));
             }
         }
         if (!empty($mim)) {
             $mim_ids = explode("; ", $mim);
             foreach ($mim_ids as $mim_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-omim", "omim:" . $mim_id));
             }
         }
         if (!empty($unigene)) {
             $unigene_ids = explode("; ", $unigene);
             foreach ($unigene_ids as $unigene_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-unigene", "unigene:" . $unigene_id));
             }
         }
         if (!empty($ensembl)) {
             $ensembl_ids = explode("; ", $ensembl);
             foreach ($ensembl_ids as $ensembl_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-ensembl", "ensembl:" . $ensembl_id));
             }
         }
         if (!empty($pubmed)) {
             $pubmed_ids = explode("; ", $pubmed);
             foreach ($pubmed_ids as $pubmed_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-pubmed", "pubmed:" . $pubmed_id));
             }
         }
         if (!empty($embl_genbank_ddbj)) {
             $genbank_ids = explode("; ", $embl_genbank_ddbj);
             foreach ($genbank_ids as $genbank_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-genbank", "genbank:" . $genbank_id));
             }
         }
         if (!empty($embl_protein)) {
             $embl_protein_ids = explode(";", $embl_protein);
             foreach ($embl_protein_ids as $embl_protein_id) {
                 parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-genbank", "genbank:" . $embl_protein_id));
             }
         }
         //write rdf to file
         $this->WriteRDFBufferToWriteFile();
     }
     //while
 }
Beispiel #11
0
 function run()
 {
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $dataset_description = '';
     $files = parent::getParameterValue('files');
     if ($files == 'all') {
         $files = explode('|', parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(',', parent::getParameterValue('files'));
     }
     if (parent::getParameterValue('id_list') != '') {
         $this->idlist = explode(",", parent::getParameterValue("id_list"));
     }
     // handle genes separately
     if (in_array("genes", $files)) {
         $orgs = array("hsa");
         //,"mmu","eco","dre","dme","ath","sce","ddi");
         echo "processing genes" . PHP_EOL;
         $ofile = "kegg-genes." . parent::getParameterValue('output_format');
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
         parent::setWriteFile($odir . $ofile, $gz);
         // get the list of genomes
         $lfile = $ldir . "genome.txt";
         $rfile = parent::getParameterValue("download_url") . "list/genome";
         if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
             $ret = utils::downloadSingle($rfile, $lfile);
         }
         $fp = fopen($lfile, "r");
         while ($l = fgets($fp)) {
             $a = explode("\t", $l);
             $b = explode(", ", $a[1]);
             $org = $b[0];
             if (!in_array($org, $orgs)) {
                 continue;
             }
             // get the list of genes for this organims
             echo "processing {$org}" . PHP_EOL;
             $this->org = $org;
             // local variable
             $lfile = $ldir . $org . ".txt";
             $rfile = parent::getParameterValue("download_url") . "list/{$org}";
             if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
                 $ret = utils::downloadSingle($rfile, $lfile);
             }
             parent::setReadFile($lfile, false);
             $this->process("gene");
             parent::getReadFile()->close();
             parent::clear();
             $this->org = null;
             // add dataset description
             $source_file = (new DataResource($this))->setURI($rfile)->setTitle("KEGG: Gene")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/plain")->setPublisher("http://www.kegg.jp/")->setHomepage("http://www.kegg.jp/")->setRights("use")->setRights("no-commercial")->setLicense("http://www.kegg.jp/kegg/legal.html")->setDataset("http://identifiers.org/kegg/");
             $dataset_description .= $source_file->toRDF();
         }
         fclose($fp);
         parent::getWriteFile()->close();
         echo "done" . PHP_EOL;
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = parent::getDate(filemtime($odir . $ofile));
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - Gene ")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/kegg/kegg.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $output_file->toRDF();
     }
     // all other files
     foreach ($files as $db) {
         if ($db == "genes") {
             continue;
         }
         echo "processing {$db}" . PHP_EOL;
         $lfile = $ldir . $db . ".txt";
         $rfile = parent::getParameterValue("download_url") . "list/{$db}";
         if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
             echo "Downloading {$rfile} ";
             $ret = utils::downloadSingle($rfile, $lfile);
             if ($ret === false) {
                 echo "unable to download {$file} ... skipping" . PHP_EOL;
                 continue;
             }
             echo "done." . PHP_EOL;
         }
         // now for each list, get the individual entries
         $ofile = "kegg-{$db}." . parent::getParameterValue('output_format');
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
         parent::setReadFile($lfile, false);
         parent::setWriteFile($odir . $ofile, $gz);
         $this->process($db);
         parent::getWriteFile()->close();
         parent::getReadFile()->close();
         parent::clear();
         echo "done!" . PHP_EOL;
         // add dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("KEGG: {$db}")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/plain")->setPublisher("http://www.kegg.jp/")->setHomepage("http://www.kegg.jp/")->setRights("use")->setRights("no-commercial")->setLicense("http://www.kegg.jp/kegg/legal.html")->setDataset("http://identifiers.org/kegg/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = parent::getDate(filemtime($odir . $ofile));
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$db} ")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/kegg/kegg.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     }
     // write the dataset description
     $this->setWriteFile($odir . $this->getBio2RDFReleaseFile());
     $this->getWriteFile()->write($dataset_description);
     $this->getWriteFile()->close();
 }
Beispiel #12
0
 function Run()
 {
     $idir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $files = parent::getParameterValue('files');
     // set the work
     if ($files != 'all') {
         // check if comma-separated, or hyphen-range
         $list = explode(",", $files);
         if (count($list) == 1) {
             // try hyphen separated
             $range = explode("-", $files);
             if (count($range) == 2) {
                 for ($i = $range[0]; $i <= $range[1]; $i++) {
                     $myfiles[] = $i;
                 }
             } else {
                 // must a single entry
                 $myfiles[] = $files;
             }
         } else {
             $myfiles = $list;
         }
     }
     $rest_uri = 'http://sabiork.h-its.org/sabioRestWebServices/';
     $getReactionIds_url = $rest_uri . "suggestions/SABIOReactionIDs";
     $reaction_list_file = $idir . "reactions.xml";
     if (!file_exists($reaction_list_file) || parent::getParameterValue('download') == 'true') {
         $xml = file_get_contents($getReactionIds_url);
         if (FALSE === $reaction_list_file) {
             exit;
         }
         $f = new FileFactory($reaction_list_file);
         $f->Write($xml);
         $f->Close();
     }
     $xml = simplexml_load_file($reaction_list_file);
     $total = count($xml->SABIOReactionID);
     if (isset($myfiles)) {
         $total = count($myfiles);
     }
     $i = 0;
     parent::setCheckpoint('dataset');
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $suffix = parent::getParameterValue('output_format');
     $ofile = "sabiork." . $suffix;
     $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
     parent::setWriteFile($odir . $ofile, $gz);
     foreach ($xml->SABIOReactionID as $rid) {
         parent::setCheckpoint('file');
         if (isset($myfiles)) {
             if (!in_array($rid, $myfiles)) {
                 continue;
             }
         }
         $i++;
         echo "{$i} / {$total} : reaction {$rid}" . PHP_EOL;
         $reaction_file = $idir . "reaction_" . $rid . ".owl.gz";
         if (!file_exists($reaction_file) || $this->GetParameterValue('download') == 'true') {
             $url = $rest_uri . 'searchKineticLaws/biopax?q=SabioReactionID:' . $rid;
             $data = file_get_contents($url);
             if ($data === FALSE) {
                 continue;
             }
             $f = new FileFactory($reaction_file, true);
             $f->Write($data);
             $f->Close();
         }
         $buf = file_get_contents("compress.zlib://" . $reaction_file);
         // send for parsing
         $p = new BioPAX2Bio2RDF($this);
         $p->SetBuffer($buf)->SetBioPAXVersion(3)->SetBaseNamespace("http://sabio.h-its.org/biopax#")->SetBio2RDFNamespace("http://bio2rdf.org/sabiork:")->SetDatasetURI($this->GetDatasetURI());
         $rdf = $p->Parse();
         parent::getWriteFile()->Write($rdf);
     }
     parent::getWriteFile()->Close();
     //generate dataset description
     echo "Generating dataset description... ";
     $source_file = (new DataResource($this))->setURI("http://sabiork.h-its.org/sabioRestWebServices/searchKineticLaws/biopax")->setTitle("SABIO-RK Biochemical Reaction Kinetics Database")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($odir . $ofile)))->setFormat("text/xml")->setPublisher("http://sabio.villa-bosch.de/")->setHomepage("http://sabio.villa-bosch.de/")->setRights("use-share-modify")->setRights("no-commercial")->setLicense("http://sabio.villa-bosch.de/layouts/content/termscondition.gsp")->setDataset("http://identifiers.org/sabiork.reaction/");
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sabiork/sabiork.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description = $source_file->toRDF() . $output_file->toRDF();
     //write dataset description to file
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #13
0
 function Run()
 {
     // get the file list
     if (parent::getParameterValue('files') == 'all') {
         $files = array('all');
     } else {
         $files = explode(",", parent::getParameterValue('files'));
     }
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     $dataset_description = '';
     foreach ($files as $file) {
         $download = parent::getParameterValue('download');
         $version = parent::getParameterValue("version");
         $zip_file = ucfirst($file) . ".mitab." . $version . ".txt.zip";
         $lfile = $ldir . $zip_file;
         $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
         $ofile = "irefindex-" . $file . "." . parent::getParameterValue('output_format');
         if (!file_exists($lfile)) {
             trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE);
             $download = true;
         }
         $rfile = $rdir . $zip_file;
         if ($download == true) {
             echo "downloading {$rfile}" . PHP_EOL;
             if (FALSE === Utils::DownloadSingle($rfile, $lfile)) {
                 trigger_error("Error in Download");
                 return FALSE;
             }
         }
         $zin = new ZipArchive();
         if ($zin->open($lfile) === FALSE) {
             trigger_error("Unable to open {$lfile}");
             exit;
         }
         if ($zin->numFiles != 1) {
             trigger_error("Found more than one file ... using first file");
         }
         $f = $zin->statIndex(0);
         $base_file = $f['name'];
         if (($fp = $zin->getStream($base_file)) === FALSE) {
             trigger_error("Unable to get {$base_file} in ziparchive {$lfile}");
             return FALSE;
         }
         parent::setReadFile($lfile);
         parent::getReadFile()->setFilePointer($fp);
         echo "Processing " . $file . " ...";
         parent::setWriteFile($odir . $ofile, true);
         if ($this->Parse() === FALSE) {
             trigger_error("Parsing Error");
             exit;
         }
         parent::writeRDFBufferToWriteFile();
         parent::getWriteFile()->close();
         $zin->close();
         echo "Done!" . PHP_EOL;
         $graph_uri = parent::getGraphURI();
         if (parent::getParameterValue('dataset_graph') == true) {
             parent::setGraphURI(parent::getDatasetURI());
         }
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("iRefIndex ({$zip_file}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/zip")->setPublisher("http://irefindex.uio.no")->setHomepage("http://irefindex.uio.no")->setRights("use")->setRights("by-attribution")->setRights("no-commercial")->setLicense("http://irefindex.uio.no/wiki/README_MITAB2.6_for_iRefIndex#License")->setDataset("http://identifiers.org/irefindex/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/irefindex/irefindex.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
         parent::setGraphURI($graph_uri);
     }
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     return TRUE;
 }
Beispiel #14
0
 function Run()
 {
     // directory shortcuts
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     if (parent::getParameterValue('omim_api_key') == '') {
         $key_file = parent::getParameterValue('omim_api_key_file');
         if (file_exists($key_file)) {
             $key = trim(file_get_contents($key_file));
             if ($key) {
                 parent::setParameterValue('omim_api_key', $key);
             } else {
                 trigger_error("No API key found in the specified omim key file {$key_file}", E_USER_WARNING);
             }
         } else {
             trigger_error("No OMIM key has been provided either by commmand line or in the expected omim key file {$key_file}", E_USER_WARNING);
         }
     }
     // get the list of mim2gene entries
     $entries = $this->GetListOfEntries($ldir);
     // get the work specified
     $list = trim(parent::getParameterValue('files'));
     if ($list != 'all') {
         // check if a hyphenated list was provided
         if (($pos = strpos($list, "-")) !== FALSE) {
             $start_range = substr($list, 0, $pos);
             $end_range = substr($list, $pos + 1);
             // get the whole list
             $full_list = $this->GetListOfEntries($ldir);
             // now intersect
             foreach ($full_list as $e => $type) {
                 if ($e >= $start_range && $e <= $end_range) {
                     $myentries[$e] = $type;
                 }
             }
             $entries = $myentries;
         } else {
             // for comma separated list
             $b = explode(",", parent::getParameterValue('files'));
             foreach ($b as $e) {
                 $myentries[$e] = '';
             }
             $entries = array_intersect_key($entries, $myentries);
         }
     }
     // set the write file
     $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
     $outfile = 'omim.' . parent::getParameterValue('output_format');
     parent::setWriteFile($odir . $outfile, $gz);
     // declare the mapping method types
     $this->get_method_type(null, true);
     // iterate over the entries
     $i = 0;
     $total = count($entries);
     foreach ($entries as $omim_id => $type) {
         echo "processing " . ++$i . " of {$total} - omim# ";
         $download_file = $ldir . $omim_id . ".json.gz";
         $gzfile = "compress.zlib://{$download_file}";
         // download if the file doesn't exist or we are told to
         if (!file_exists($download_file) || parent::getParameterValue('download') == true) {
             // download using the api
             $url = parent::getParameterValue('omim_api_url') . '&apiKey=' . parent::getParameterValue('omim_api_key') . '&mimNumber=' . $omim_id;
             $buf = file_get_contents($url);
             if (strlen($buf) != 0) {
                 file_put_contents($download_file, $buf);
                 usleep(500000);
                 // limit of 4 requests per second
             }
         }
         // load entry, parse and write to file
         $entry = json_decode(file_get_contents($gzfile), true);
         $omim_id = trim((string) $entry["omim"]["entryList"][0]["entry"]['mimNumber']);
         echo $omim_id;
         $this->ParseEntry($entry, $type);
         parent::writeRDFBufferToWriteFile();
         echo PHP_EOL;
     }
     parent::writeRDFBufferToWriteFile();
     parent::getWriteFile()->close();
     // generate the dataset description file
     $source_file = (new DataResource($this))->setURI(parent::getParameterValue('omim_api_url'))->setTitle("OMIM " . parent::getDatasetVersion())->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z"))->setFormat("application/json")->setPublisher("http://omim.org")->setHomepage("http://omim.org")->setRights("use")->setRights("no-commercial")->setRights("registration-required")->setLicense("http://www.omim.org/help/agreement")->setDataset("http://identifiers.org/omim/");
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/omim/omim.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description = $source_file->toRDF() . $output_file->toRDF();
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     return TRUE;
 }
Beispiel #15
0
 function process()
 {
     if (parent::getParameterValue('files') == 'all') {
         $files = explode("|", parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(",", parent::getParameterValue('files'));
     }
     $ldir = parent::getParameterValue('indir');
     $rdir = parent::getParameterValue('download_url');
     $odir = parent::getParameterValue('outdir');
     $rfiles = array("dbxref" => "curation/chromosomal_feature/dbxref.tab", "features" => "curation/chromosomal_feature/SGD_features.tab", "domains" => "curation/calculated_protein_info/domains/domains.tab", "protein" => "curation/calculated_protein_info/protein_properties.tab", "goa" => "curation/literature/gene_association.sgd.gz", "goslim" => "curation/literature/go_slim_mapping.tab", "complex" => "curation/literature/go_protein_complex_slim.tab", "interaction" => "curation/literature/interaction_data.tab", "phenotype" => "curation/literature/phenotype_data.tab", "pathways" => "curation/literature/biochemical_pathways.tab", "mapping" => "mapping");
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $gz = false;
     if (strstr(parent::getParameterValue('output_format'), "gz")) {
         $gz = true;
     }
     if (parent::getParameterValue('one_file') == true) {
         $ofile = "sgd." . parent::getParameterValue('output_format');
         parent::setWriteFile($odir . $ofile, $gz);
     }
     $dataset_description = '';
     foreach ($files as $file) {
         $ext = substr(strrchr($rfiles[$file], '.'), 1);
         if ($ext == "tab") {
             $lfile = "sgd_" . $file . ".tab";
         } elseif ($ext = "gz") {
             $lfile = "sgd_" . $file . ".tab.gz";
         }
         $rfile = $rdir . $rfiles[$file];
         if (!file_exists($ldir . $lfile) && parent::getParameterValue('download') == false && $file != 'mapping') {
             trigger_error($ldir . $lfile . " not found. Will attempt to download.", E_USER_NOTICE);
             Utils::DownloadSingle($rfile, $ldir . $lfile);
         }
         if (parent::getParameterValue('one_file') == false) {
             $ofile = "sgd_" . $file . '.' . parent::getParameterValue('output_format');
             parent::setWriteFile($odir . $ofile, $gz);
         }
         //parse file
         parent::setReadFile($ldir . $lfile, $gz);
         $fnx = $file;
         echo "Processing {$file}... ";
         $this->{$fnx}();
         echo PHP_EOL . "done!";
         //write RDF to file
         parent::writeRDFBufferToWriteFile();
         //close write file
         if (parent::getParameterValue('one_file') == false) {
             parent::getWriteFile()->close();
         }
         echo PHP_EOL;
         // generate the dataset release file
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Saccharomyces Genome Database ({$file})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($ldir . $lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://www.yeastgenome.org/")->setHomepage("http://www.yeastgenome.org/")->setRights("use")->setLicense("http://www.stanford.edu/site/terms.html")->setDataset("http://identifiers.org/sgd/");
         $dataset_description .= $source_file->toRDF();
         if (parent::getParameterValue('one_file') == false) {
             $prefix = parent::getPrefix();
             $bVersion = parent::getParameterValue('bio2rdf_release');
             $date = date("Y-m-d\\TG:i:s\\Z");
             $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sgd/sgd.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
             if ($gz) {
                 $output_file->setFormat("application/gzip");
             }
             if (strstr(parent::getParameterValue('output_format'), "nt")) {
                 $output_file->setFormat("application/n-triples");
             } else {
                 $output_file->setFormat("application/n-quads");
             }
             $dataset_description .= $output_file->toRDF();
         }
     }
     //foreach
     //set graph URI back to default
     parent::setGraphURI($graph_uri);
     if (parent::getParameterValue('one_file') == true) {
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sgd/sgd.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     }
     //write dataset description to file
     echo "Generating dataset description... " . PHP_EOL;
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #16
0
 function Run()
 {
     $indir = parent::getParameterValue('indir');
     $outdir = parent::getParameterValue('outdir');
     $download_url = parent::getParameterValue('download_url');
     if (parent::getParameterValue('files') == 'all') {
         $files = explode("|", parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode("|", parent::getParameterValue('files'));
     }
     if (parent::getParameterValue("id_list")) {
         $this->id_list = array_flip(explode(",", parent::getParameterValue('id_list')));
     }
     $dataset_description = '';
     foreach ($files as $f) {
         if ($f == 'drugbank') {
             $file = 'drugbank.xml.zip';
             $lname = 'drugbank';
         }
         $fnx = 'parse_' . $f;
         $rfile = parent::getParameterValue('download_url') . $file;
         $lfile = parent::getParameterValue('indir') . $file;
         $cfile = $lname . "." . parent::getParameterValue('output_format');
         // download
         if (!file_exists($lfile) || parent::getParameterValue('download') == true) {
             utils::downloadSingle($rfile, $lfile);
         }
         // setup the write
         $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
         parent::setWriteFile($outdir . $cfile, $gz);
         echo $outdir . $cfile;
         if (file_exists($indir . $file)) {
             // call the parser
             echo "processing {$file} ..." . PHP_EOL;
             $this->{$fnx}($indir, $file);
             echo "done" . PHP_EOL;
             parent::clear();
         }
         parent::getWriteFile()->close();
         // dataset description
         $ouri = parent::getGraphURI();
         parent::setGraphURI(parent::getDatasetURI());
         $source_version = parent::getDatasetVersion();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $prefix = parent::getPrefix();
         $date = date("Y-m-d\\TH:i:sP");
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("DrugBank ({$file})")->setRetrievedDate(date("Y-m-d\\TH:i:sP", filemtime($indir . $file)))->setFormat("application/xml")->setFormat("application/zip")->setPublisher("http://drugbank.ca")->setHomepage("http://drugbank.ca")->setRights("use")->setRights("by-attribution")->setRights("no-commercial")->setLicense("http://www.drugbank.ca/about")->setDataset("http://identifiers.org/drugbank/");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$cfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} v{$source_version}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/drugbank/drugbank.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         parent::writeToReleaseFile($source_file->toRDF() . $output_file->toRDF());
         parent::setGraphURI($ouri);
     }
     parent::closeReleaseFile();
 }
Beispiel #17
0
 function run()
 {
     $idir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $files = parent::getParameterValue('files');
     $dataset_description = '';
     if ($files == 'all') {
         $files = explode('|', parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(',', parent::getParameterValue('files'));
     }
     foreach ($files as $file) {
         $f = $file;
         if ($file != "freq") {
             $f = "all_" . $file;
         }
         $f = "meddra_" . $f . ".tsv.gz";
         $lfile = $idir . $f;
         $rfile = parent::getParameterValue('download_url') . $f;
         if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
             echo "downloading {$file}... ";
             $ret = file_get_contents($rfile);
             if ($ret === FALSE) {
                 trigger_error("Unable to get {$rfile}", E_USER_WARNING);
                 continue;
             }
             $ret = file_put_contents($lfile, $ret);
             if ($ret === FALSE) {
                 trigger_error("Unable to write {$lfile}", E_USER_ERROR);
                 exit;
             }
             echo "done!" . PHP_EOL;
         }
         echo "Processing {$f}... ";
         parent::setReadFile($lfile, true);
         $suffix = parent::getParameterValue('output_format');
         $ofile = "sider-" . $file . '.' . $suffix;
         $gz = false;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         parent::setWriteFile($odir . $ofile, $gz);
         $this->{$file}();
         parent::getWriteFile()->Close();
         parent::getReadFile()->Close();
         echo "done!" . PHP_EOL;
         echo "Generating dataset description... ";
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("SIDER Side Effect resource ({$file}.tsv.gz")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://sideeffects.embl.de/")->setHomepage("http://sideeffects.embl.de/")->setRights("use-share-modify")->setLicense("http://creativecommons.org/licenses/by-nc-sa/3.0/")->setDataset("http://identifiers.org/sider.effect/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = parent::getDate(filemtime($odir . $ofile));
         $output_file = (new DataResource($this))->setURI("http://download.bio2df.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sider/sider.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     }
     //foreach
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #18
0
 function Run()
 {
     $ldir = $this->GetParameterValue('indir');
     $odir = $this->GetParameterValue('outdir');
     $rfile = $this->GetParameterValue('download_url');
     $lfile = substr($rfile, strrpos($rfile, "/") + 1);
     // check if exists
     if (!file_exists($ldir . $lfile) or parent::getParameterValue('download') == 'true') {
         echo "dowloading {$rfile} ...";
         trigger_error("Will attempt to download ", E_USER_NOTICE);
         Utils::DownloadSingle($rfile, $ldir . $lfile);
         echo "done" . PHP_EOL;
     }
     // make sure we have the zip archive
     $zin = new ZipArchive();
     if ($zin->open($ldir . $lfile) === FALSE) {
         trigger_error("Unable to open {$ldir}{$lfile}");
         exit;
     }
     // get the work
     if ($this->GetParameterValue('files') == 'all') {
         $files = explode("|", $this->GetParameterList('files'));
         array_shift($files);
     } else {
         $files = explode("|", $this->GetParameterValue('files'));
     }
     $gz = false;
     if (strstr(parent::getParameterValue('output_format'), "gz")) {
         $gz = true;
     }
     $outfile = "ndc." . parent::getParameterValue('output_format');
     parent::setWriteFile($odir . $outfile, $gz);
     // now go through each item in the zip file and process
     foreach ($files as $file) {
         echo "Processing {$file}... ";
         $fpin = $zin->getStream($file . ".txt");
         if (!$fpin) {
             trigger_error("Unable to get pointer to {$file} in {$ldir}{$lfile}", E_USER_ERROR);
             return FALSE;
         }
         $this->{$file}($fpin);
         parent::writeRDFBufferToWriteFile();
         echo "done!" . PHP_EOL;
     }
     parent::getWriteFile()->close();
     echo "Generating dataset description for {$outfile}... ";
     //start generating dataset description file
     $dataset_description = '';
     $source_file = (new DataResource($this))->setURI($rfile)->setTitle("FDA National Drug Code Directory")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($ldir . $lfile)))->setFormat("text/tab-separated-value")->setFormat("application/zip")->setPublisher("http://www.fda.gov")->setHomepage("http://www.fda.gov/Drugs/InformationOnDrugs/ucm142438.htm")->setRights("use-share")->setLicense(null)->setDataset("http://identifiers.org/ndc/");
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ndc/ndc.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description = $source_file->toRDF() . $output_file->toRDF();
     //write dataset description to file
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #19
0
 function run()
 {
     if (parent::getParameterValue('files') == 'all') {
         $files = explode("|", $this->GetParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(",", parent::getParameterValue('files'));
     }
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     $dataset_description = '';
     foreach ($files as $file) {
         $download = parent::getParameterValue('download');
         $lfile = $ldir . "goa_" . $file . ".gz";
         if (!file_exists($lfile) && $download == false) {
             trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE);
             $download = true;
         }
         //download file
         $rfile = $rdir . strtoupper($file) . "/gene_association.goa_" . $file . ".gz";
         if ($download == true) {
             echo "downloading {$file} ... ";
             //file_put_contents($lfile,file_get_contents($rfile));
             utils::DownloadSingle($rfile, $lfile);
         }
         $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
         $ofile = "goa_" . $file . "." . parent::getParameterValue('output_format');
         parent::setReadFile($lfile, TRUE);
         parent::setWriteFile($odir . $ofile, $gz);
         echo "processing {$file} ... ";
         $this->process($file);
         echo "done!";
         parent::clear();
         //close write file
         parent::getWriteFile()->close();
         echo PHP_EOL;
         // dataset description
         $graph_uri = parent::getGraphURI();
         if (parent::getParameterValue('dataset_graph') == true) {
             parent::setGraphURI(parent::getDatasetURI());
         }
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Gene Ontology Annotation file {$file} ({$rfile}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://www.ebi.ac.uk/")->setHomepage("http://www.ebi.ac.uk/GOA/")->setRights("use")->setLicense("http://www.ebi.ac.uk/GOA/goaHelp.html")->setDataset("http://identifiers.org/goa/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/irefindex/irefindex.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
         parent::setGraphURI($graph_uri);
     }
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #20
0
 function Run()
 {
     $dataset_description = '';
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $list_file = $ldir . "ftp_list.txt";
     if (!file_exists($list_file) || $this->getParameterValue('download') == true) {
         echo "Getting FTP file list ...";
         $list = $this->getFtpFileList('ftp.ncbi.nlm.nih.gov', '/refseq/release/complete/', '/(complete\\.[0-9]+\\.protein\\.gpff\\.gz)/');
         if (!isset($list) or count($list) == 0) {
             trigger_error("Unable to get list of files from FTP site. Check internet connection", E_USER_ERROR);
             exit(-1);
         }
         asort($list);
         $buf = implode("\n", $list);
         file_put_contents($list_file, $buf);
         echo "Done." . PHP_EOL;
     } else {
         echo "Using existing ftp list" . PHP_EOL;
         $list = explode("\n", file_get_contents($list_file));
     }
     $counter = 1;
     $total = count($list);
     foreach ($list as $f) {
         $lfile = $ldir . $f;
         echo "Processing " . $counter++ . "/{$total} {$f}. ";
         if (!file_exists($lfile) || $this->getParameterValue('download') == true) {
             $rfile = parent::getParameterValue('download_url') . $f;
             echo "Downloading ...";
             utils::DownloadSingle($rfile, $lfile);
             echo "done.";
         } else {
             echo "Using existing file.";
         }
         echo PHP_EOL;
     }
     //if download
     //iterate over the files
     $files = $this->getFilePaths($ldir, 'gz');
     asort($files);
     foreach ($files as $f) {
         $lfile = $ldir . $f;
         $ofile = $odir . basename($f, ".gz") . "." . parent::getParameterValue('output_format');
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
         parent::setWriteFile($ofile, $gz);
         parent::setReadFile($lfile, true);
         echo "processing {$f} ...";
         $this->process();
         parent::clear();
         echo "done!" . PHP_EOL;
         $this->getReadFile()->close();
         $this->getWriteFile()->close();
         $source_file = (new DataResource($this))->setURI(parent::getParameterValue('download_url') . $lfile)->setTitle("NCBI RefSeq - {$f}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat('text/refseq-format')->setFormat('application/zip')->setPublisher('http://www.ncbi.nlm.nih.gov')->setHomepage('http://www.ncbi.nlm.nih.gov/refseq')->setRights('use')->setRights('attribution')->setLicense('http://www.nlm.nih.gov/copyright.html')->setDataset(parent::getDatasetURI());
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$f}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/refseq/refseq.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("restricted-by-source-license")->setLicense("http://creativecommons/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         $dataset_description .= $output_file->toRDF() . $source_file->toRDF();
     }
     //for
     parent::writeToReleaseFile($dataset_description);
     parent::getWriteFile()->close();
 }
Beispiel #21
0
 function Run()
 {
     $sp = trim(parent::getParameterValue('files'));
     if ($sp == 'all') {
         $files = $this->getPackageMap();
     } else {
         $s_a = explode(",", $sp);
         $pm = $this->getPackageMap();
         $files = array();
         foreach ($s_a as $a) {
             if (array_key_exists($a, $pm)) {
                 $files[$a] = $pm[$a];
             }
         }
     }
     //else
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $dd = '';
     //now iterate over the files array
     $year = parent::getParameterValue('year');
     foreach ($files as $k => $fpattern) {
         $file = str_replace("YEAR", $year, $fpattern);
         $lfile = $ldir . $file;
         $rfile = parent::getParameterValue("download_url") . $file;
         // download if necessary
         if (!file_exists($lfile) || parent::getParameterValue('download') == "true") {
             echo "Downloading {$file} ... ";
             $ret = utils::downloadSingle($rfile, $lfile);
             if ($ret === FALSE) {
                 trigger_error("Unable to get {$file}", E_USER_ERROR);
                 continue;
             }
             echo "done!" . PHP_EOL;
         }
         //set the outfile
         $ofile = "mesh_" . $k . "." . parent::getParameterValue('output_format');
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
         echo "processing {$k} ...";
         parent::setReadFile($lfile, FALSE);
         parent::setWriteFile($odir . $ofile, $gz);
         $fnx = $k;
         $this->{$fnx}();
         parent::writeRDFBufferToWriteFile();
         parent::getWriteFile()->close();
         echo "done!" . PHP_EOL;
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("MeSH")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/x-mesh-record")->setPublisher("http://www.nlm.nih.gov")->setHomepage("http://www.nlm.nih.gov/mesh/")->setRights("use")->setLicense("http://www.nlm.nih.gov/databases/download.html")->setDataset("http://identifiers.org/mesh/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = parent::getDate(filemtime($odir . $ofile));
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/mesh/mesh.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dd .= $source_file->toRDF() . $output_file->toRDF();
     }
     //foreach
     parent::setWriteFile($odir . $this->getBio2RDFReleaseFile($this->getNamespace()));
     parent::getWriteFile()->write($dd);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #22
0
 /**
  *	process the local copy of the pubchem bioassay directory
  **/
 function parse_bioassay()
 {
     $ignore = array(".", "..");
     $input_dir = $this->getParameterValue('indir') . "/bioassay";
     $gz = false;
     $tmp = '/tmp/pubchem';
     $this->CreateDirectory($tmp);
     $this->CreateDirectory($this->getParameterValue('outdir') . "/bioassay/");
     parent::setDatasetURI("bio2rdf_dataset:bio2rdf-" . $this->getPcbPrefix() . "-" . date("Ymd"));
     $graph_uri = parent::getGraphURI();
     //set graph URI to dataset uri
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $dataset_description = '';
     if ($handle = opendir($input_dir)) {
         while (false !== ($dir = readdir($handle))) {
             if (in_array($dir, $ignore)) {
                 continue;
             }
             $zip = new ZipArchive();
             if ($zip->open($input_dir . "/" . $dir) === TRUE) {
                 $zip->extractTo($tmp);
                 $this->CreateDirectory($this->getParameterValue('outdir') . "/bioassay/" . array_shift(explode(".", $dir)));
                 $read_dir = $tmp . "/" . array_shift(explode(".", $dir)) . "/";
                 if ($files = opendir($read_dir)) {
                     while (false != ($file = readdir($files))) {
                         if (in_array($file, $ignore)) {
                             continue;
                         }
                         echo "Processing file: " . $read_dir . $file . PHP_EOL;
                         $suffix = parent::getParameterValue('output_format');
                         $outfile = realpath($this->getParameterValue('outdir')) . "/bioassay/" . array_shift(explode(".", $dir)) . "/" . basename($file, ".xml.gz") . "." . $suffix;
                         if (strstr(parent::getParameterValue('output_format'), "gz")) {
                             $gz = true;
                         }
                         echo "... into " . $outfile . PHP_EOL;
                         parent::setWriteFile($outfile, $gz);
                         parent::setCheckpoint('file');
                         $this->parse_bioassay_file($read_dir, $file);
                         parent::getWriteFile()->close();
                         //parent::clear();
                     }
                     rmdir($tmp);
                 } else {
                     echo "unable to open directory to read files.\n";
                 }
                 $zip->close();
             }
         }
         closedir($handle);
         $source_file = (new DataResource($this))->setURI("http://www.ncbi.nlm.nih.gov/pcassay")->setTitle("PubChem BioAssay")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($input_dir)))->setFormat("text/xml")->setFormat("application/zip")->setPublisher("http://ncbi.nlm.nih.gov/")->setHomepage("http://pubchem.ncbi.nlm.nih.gov/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("ftp://ftp.ncbi.nlm.nih.gov/pubchem/README")->setDataset("http://identifiers.org/pubchem.bioassay/");
         $prefix = $this->getPcbPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pubchem/pubchem.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
         //set graph URI back to default
         parent::setGraphURI($graph_uri);
         // write the dataset description
         $this->setWriteFile($this->getParameterValue('outdir') . "/bioassay/" . $this->getBio2RDFReleaseFile());
         $this->getWriteFile()->write($dataset_description);
         $this->getWriteFile()->close();
     } else {
         echo "unable to read directory contents: " . $input_dir . "\n";
         exit;
     }
 }
Beispiel #23
0
 function Run()
 {
     // directory shortcuts
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     // get the work specified
     $list = trim(parent::getParameterValue('files'));
     if ($list == 'all') {
         // call the getAllModelsId webservice
         $file = $ldir . "all_models.json";
         if (!file_exists($file)) {
             try {
                 $x = @new SoapClient("http://www.ebi.ac.uk/biomodels-main/services/BioModelsWebServices?wsdl");
             } catch (Exception $e) {
                 echo $e->getMessage();
             }
             $entries = $x->getAllModelsId();
             file_put_contents($file, json_encode($entries));
         } else {
             $entries = json_decode(file_get_contents($file));
         }
     } elseif ($list == 'curated') {
         // call the getAllCuratedModelsId webservice
         $file = $ldir . "curated_models.json";
         if (!file_exists($file)) {
             try {
                 $x = @new SoapClient("http://www.ebi.ac.uk/biomodels-main/services/BioModelsWebServices?wsdl");
             } catch (Exception $e) {
                 echo $e->getMessage();
             }
             $entries = $x->getAllCuratedModelsId();
             file_put_contents($file, json_encode($entries));
         } else {
             $entries = json_decode(file_get_contents($file));
         }
     } else {
         // check if a hyphenated list was provided
         if (($pos = strpos($list, "-")) !== FALSE) {
             $start_range = substr($list, 0, $pos);
             $end_range = substr($list, $pos + 1);
             for ($i = $start_range; $i <= $end_range; $i++) {
                 $entries[] = "BIOMD" . str_pad($i, 10, "0", STR_PAD_LEFT);
             }
         } else {
             // for comma separated list
             $b = explode(",", $this->GetParameterValue('files'));
             foreach ($b as $e) {
                 $entries[] = "BIOMD" . str_pad($e, 10, "0", STR_PAD_LEFT);
             }
         }
     }
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     // set the write file
     $suffix = parent::getParameterValue('output_format');
     $outfile = 'biomodels' . '.' . $suffix;
     $gz = false;
     if (strstr(parent::getParameterValue('output_format'), "gz")) {
         $gz = true;
     }
     $dataset_description = '';
     parent::setWriteFile($odir . $outfile, $gz);
     // iterate over the entries
     $i = 0;
     $total = count($entries);
     foreach ($entries as $id) {
         echo "processing " . ++$i . " of {$total} - biomodel# " . $id;
         $download_file = $ldir . $id . ".owl.gz";
         $url = parent::getParameterValue('download_url') . "publ/{$id}/{$id}-biopax3.owl";
         // download if the file doesn't exist or we are told to
         if (!file_exists($download_file) || $this->GetParameterValue('download') == 'true') {
             // download
             echo " - downloading";
             $ret = utils::downloadsingle($url, 'compress.zlib://' . $download_file, true);
             if ($ret === false) {
                 echo "\nTrying non-curated model";
                 $url = parent::getParametervalue('download_url') . "uncura_publ/{$id}/{$id}-biopax3.owl";
                 $ret = utils::downloadsingle($url, 'compress.zlib://' . $download_file, true);
                 if ($ret === false) {
                     continue;
                 }
             }
             echo " - downloaded";
         }
         // load entry, parse and write to file
         echo " - parsing... ";
         // $this->SetReadFile($download_file,true);
         $buf = file_get_contents("compress.zlib://" . $download_file);
         $converter = new BioPAX2Bio2RDF($this);
         $converter->SetBuffer($buf)->SetBioPAXVersion(3)->SetBaseNamespace("http://identifiers.org/biomodels.db/{$id}/")->SetBio2RDFNamespace("http://bio2rdf.org/biomodels:" . $id . "_")->SetDatasetURI($this->GetDatasetURI());
         $rdf = $converter->Parse();
         parent::addRDF($rdf);
         parent::writeRDFBufferToWriteFile();
         //generate dataset description
         $source_file = (new DataResource($this))->setURI($url)->setTitle("EBI BioModels Database - BioModel # {$id}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($download_file)))->setFormat("rdf/xml")->setPublisher("http://www.ebi.ac.uk/")->setHomepage("http://www.ebi.ac.uk/biomodels-main/")->setRights("use-share-modify")->setLicense("http://www.ebi.ac.uk/biomodels-main/termsofuse")->setDataset("http://identifiers.org/biomodels.db/");
         $dataset_description .= $source_file->toRDF();
         echo "done!" . PHP_EOL;
     }
     //foreach
     parent::getWriteFile()->close();
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/biomodels/biomodels.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description .= $output_file->toRDF();
     //write dataset description to file
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #24
0
 function process()
 {
     // get the file list
     if (parent::getParameterValue('files') == 'all') {
         $files = explode("|", parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(",", parent::getParameterValue('files'));
     }
     $dataset_description = '';
     //set directory values
     $ldir = parent::getParameterValue('indir');
     $rdir = parent::getParameterValue('download_url');
     $odir = parent::getParameterValue('outdir');
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $gz_suffix = ".gz";
     foreach ($files as $file) {
         if ($file == 'chem_gene_ixn_types') {
             $suffix = '.tsv';
         } else {
             if ($file == 'exposure_ontology') {
                 $suffix = '.obo';
             } else {
                 $suffix = ".tsv.gz";
             }
         }
         $lfile = $ldir . $file . $gz_suffix;
         $rfile = $rdir . 'CTD_' . $file . $suffix;
         if (!file_exists($lfile)) {
             trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE);
             if ($suffix == ".tsv.gz") {
                 Utils::DownloadSingle($rfile, $lfile);
             } else {
                 Utils::DownloadSingle($rfile, "compress.zlib://" . $lfile);
             }
         }
         $out_suffix = parent::getParameterValue('output_format');
         $ofile = "ctd_" . $file . "." . $out_suffix;
         $gz = false;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         echo "Processing " . $file . " ...";
         parent::setWriteFile($odir . $ofile, $gz);
         //set read file
         parent::setReadFile($lfile, TRUE);
         $fnx = "CTD_" . $file;
         $this->{$fnx}();
         //close write file
         parent::getWriteFile()->close();
         parent::clear();
         echo "done!" . PHP_EOL;
         // generate the dataset release file
         echo "Generating dataset description... ";
         if ($file == "chemicals") {
             $dataset = "http://identifiers.org/ctd.chemical/";
         } else {
             if ($file == "diseases") {
                 $dataset = "http://identifiers.org/ctd.disease/";
             } else {
                 if ($file == "genes") {
                     $dataset = "http://identifiers.org/ctd.gene/";
                 } else {
                     $dataset = null;
                 }
             }
         }
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Comparative Toxicogenomics Database ({$file}.{$gz_suffix}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://ctdbase.org/")->setHomepage("http://ctdbase.org/")->setRights("use")->setRights("by-attribution")->setRights("no-commercial")->setLicense("http://ctdbase.org/about/legal.jsp")->setDataset($dataset);
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ctd/ctd.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     }
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
 function Run()
 {
     // get the work
     if ($this->GetParameterValue('files') == 'all') {
         $sources = explode("|", parent::getParameterList('files'));
         array_shift($sources);
     } else {
         // comma separated list
         $sources = explode(",", parent::getParameterValue('files'));
     }
     $download_files = array("h**o-sapiens" => "Pathway%20Commons%202%20homo%20sapiens.BIOPAX.owl.gz", "hprd" => "Pathway%20Commons%202%20HPRD.BIOPAX.owl.gz", "humancyc" => "Pathway%20Commons%202%20HumanCyc.BIOPAX.owl.gz", "nci-nature" => "Pathway%20Commons%202%20NCI_Nature.BIOPAX.owl.gz", "panther-pathway" => "Pathway%20Commons%202%20PANTHER%20Pathway.BIOPAX.owl.gz", "phosphositeplus" => "Pathway%20Commons%202%20PhosphoSitePlus.BIOPAX.owl.gz", "reactome" => "Pathway%20Commons%202%20Reactome.BIOPAX.owl.gz");
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $dataset_description = '';
     // iterate over the requested data
     foreach ($sources as $source) {
         echo "processing {$source}... ";
         $ldir = parent::getParameterValue('indir');
         $odir = parent::getParameterValue('outdir');
         $rdir = parent::getParameterValue('download_url');
         // set the remote and input files
         $file = $source . ".owl";
         $zfile = $source . ".owl.gz";
         $rfile = $rdir . $download_files[$source];
         $lfile = $ldir . $zfile;
         // download if if the file doesn't exist locally or we are told to
         if (!file_exists($lfile) || $this->GetParameterValue('download') == 'true') {
             // download
             echo "downloading... ";
             file_put_contents($lfile, file_get_contents($rfile));
         }
         // extract the file out of the ziparchive
         // and load into a buffer
         echo 'extracting... ';
         if (($fpin = gzopen($lfile, "r")) === FALSE) {
             trigger_error("Unable to open {$lfile}", E_USER_ERROR);
             exit;
         }
         $data = '';
         while (!gzeof($fpin)) {
             $buffer = gzgets($fpin, 4096);
             $data .= $buffer;
         }
         gzclose($fpin);
         // set the output file
         $suffix = parent::getParameterValue('output_format');
         $outfile = $source . '.' . $suffix;
         $gz = false;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         parent::setWriteFile($odir . $outfile, $gz);
         // send for parsing
         $p = new BioPAX2Bio2RDF($this);
         $p->SetBuffer($data)->SetBioPAXVersion(3)->SetBaseNamespace("http://purl.org/pc2/3/")->SetBio2RDFNamespace("http://bio2rdf.org/pathwaycommons:")->SetDatasetURI(parent::getDatasetURI());
         $rdf = $p->Parse();
         parent::addRDF($rdf);
         // write to output
         parent::writeRDFBufferToWriteFile();
         parent::getWriteFile()->Close();
         echo "done!" . PHP_EOL;
         //generate dataset description
         echo "Generating dataset description for {$zfile}... ";
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Pathway Commons")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("rdf/xml")->setPublisher("http://www.pathwaycommons.org/")->setHomepage("http://www.pathwaycommons.org/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("http://www.pathwaycommons.org/pc2/home.html#data_sources")->setDataset("http://identifiers.org/pathwaycommons/");
         $dataset_description .= $source_file->toRDF();
         echo "done!" . PHP_EOL;
     }
     echo "Generating dataset description for Bio2RDF Pathways Commons dataset... ";
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pathwaycommons/pathwaycommons.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description .= $output_file->toRDF();
     //write dataset description to file
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #26
0
 function Run()
 {
     // directory shortcuts
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     // get the listings page
     $rfile = trim(parent::getParameterValue('download_url'));
     $file = "interpro.xml.gz";
     $lfile = $ldir . $file;
     if (!file_exists($lfile) || parent::getParameterValue("download") == "true") {
         echo "Downloading {$lfile}" . PHP_EOL;
         $ret = file_get_contents($rfile);
         if ($ret === FALSE) {
             trigger_error("unable to download {$rfile}");
             exit;
         }
         file_put_contents($lfile, $ret);
     }
     echo "Loading XML file...";
     $cxml = new CXML($lfile);
     $cxml->Parse();
     $xml = $cxml->GetXMLRoot();
     echo "Done" . PHP_EOL;
     // set the write file
     $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
     $outfile = "interpro." . parent::getParameterValue('output_format');
     parent::setWriteFile($odir . $outfile, $gz);
     echo "Parsing interpro xml file" . PHP_EOL;
     $this->parse($xml);
     parent::writeRDFBufferToWriteFile();
     parent::getWriteFile()->close();
     echo "Done!" . PHP_EOL;
     // let's make an nq file
     parent::setGraphURI(parent::getDatasetURI());
     // dataset description
     $source_version = parent::getDatasetVersion();
     $source_file = (new DataResource($this))->setURI($rfile)->setTitle("InterPro v{$source_version}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("application/xml")->setFormat("application/g-zip")->setPublisher("http://www.ebi.ac.uk/")->setHomepage("http://www.ebi.ac.uk/interpro/")->setRights("InterPro - Integrated Resource Of Protein Domains And Functional Sites. Copyright (C) 2001 The InterPro Consortium")->setLicense("http://www.ebi.ac.uk/interpro/faqs.html")->setDataset("http://identifiers.org/interpro/");
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} v{$source_version}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/interpro/interpro.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description = $source_file->toRDF() . $output_file->toRDF();
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     return true;
 }
Beispiel #27
0
 function OBO2RDF($abbv)
 {
     $abbv = strtolower($abbv);
     if ($abbv == "doid") {
         $abbv = "do";
     }
     $minimal = parent::getParameterValue('detail') == 'min' ? true : false;
     $minimalp = parent::getParameterValue('detail') == 'min+' ? true : false;
     $version = parent::getParameterValue("bio2rdf_release");
     $tid = '';
     $first = true;
     $is_a = false;
     $is_deprecated = false;
     $min = $buf = '';
     $ouri = "http://bio2rdf.org/lsr:" . $abbv;
     $dataset_uri = $abbv . "_resource:bio2rdf.dataset.{$abbv}.R" . $version;
     parent::setGraphURI($dataset_uri);
     $buf = parent::triplify($ouri, "rdf:type", "owl:Ontology");
     $graph_uri = '<' . parent::getRegistry()->getFQURI(parent::getGraphURI()) . '>';
     $bid = 1;
     while ($l = parent::getReadFile()->read()) {
         $lt = trim($l);
         if (strlen($lt) == 0) {
             continue;
         }
         if ($lt[0] == '!') {
             continue;
         }
         if (strstr($l, "[Term]")) {
             // first node?
             if ($first == true) {
                 // ignore the first case
                 $first = false;
             } else {
                 if ($tid != '' && $is_a == false && $is_deprecated == false) {
                     $t = parent::triplify($tid, "rdfs:subClassOf", "obo_vocabulary:Entity");
                     $buf .= $t;
                     $min .= $t;
                 }
             }
             $is_a = false;
             $is_deprecated = false;
             unset($typedef);
             $term = '';
             $tid = '';
             continue;
         } else {
             if (strstr($l, "[Typedef]")) {
                 $is_a = false;
                 $is_deprecated = false;
                 unset($term);
                 $tid = '';
                 $typedef = '';
                 continue;
             }
         }
         //echo "LINE: $l".PHP_EOL;
         // to fix error in obo generator
         $lt = str_replace("synonym ", "synonym: ", $lt);
         $lt = preg_replace("/\\{.*\\} !/", " !", $lt);
         $a = explode(" !", $lt);
         if (isset($a[1])) {
             $exc = trim($a[1]);
         }
         $a = explode(": ", trim($a[0]), 2);
         // let's go
         if (isset($intersection_of)) {
             if ($a[0] != "intersection_of") {
                 //		$intersection_of .= ")].".PHP_EOL;
                 //$buf .= $intersection_of;
                 if ($minimalp) {
                     $min .= $intersection_of;
                 }
                 unset($intersection_of);
             }
         }
         if (isset($relationship)) {
             if ($a[0] != "relationship") {
                 //	$relationship .= ")].".PHP_EOL;
                 //$buf .= $relationship;
                 if ($minimalp) {
                     $min .= $relationship;
                 }
                 unset($relationship);
             }
         }
         if (isset($typedef)) {
             if ($a[0] == "id") {
                 $c = explode(":", $a[1]);
                 if (count($c) == 1) {
                     $ns = "obo";
                     $id = $c[0];
                 } else {
                     $ns = strtolower($c[0]);
                     $id = $c[1];
                 }
                 $id = str_replace(array("(", ")"), array("_", ""), $id);
                 $tid = $ns . ":" . $id;
             } else {
                 if ($a[0] == "name") {
                     $buf .= parent::describeClass($tid, addslashes(stripslashes($a[1])));
                 } else {
                     if ($a[0] == "is_a") {
                         if (FALSE !== ($pos = strpos($a[1], "!"))) {
                             $a[1] = substr($a[1], 0, $pos - 1);
                         }
                         $buf .= parent::triplify($tid, "rdfs:subPropertyOf", "obo_vocabulary:" . strtolower($a[1]));
                     } else {
                         if ($a[0] == "is_obsolete") {
                             $buf .= parent::triplify($tid, "rdf:type", "owl:DeprecatedClass");
                             $is_deprecated = true;
                         } else {
                             if ($a[0][0] == "!") {
                                 $a[0] = substr($a[0], 1);
                             }
                             $buf .= parent::triplifyString($tid, "obo_vocabulary:{$a['0']}", str_replace('"', '', stripslashes($a[1])));
                         }
                     }
                 }
             }
         } else {
             if (isset($term)) {
                 if ($a[0] == "is_obsolete" && $a[1] == "true") {
                     $t = parent::triplify($tid, "rdf:type", "owl:DeprecatedClass");
                     $t .= parent::triplify($tid, "rdfs:subClassOf", "owl:DeprecatedClass");
                     $min .= $t;
                     $buf .= $t;
                     $is_deprecated = true;
                 } else {
                     if ($a[0] == "id") {
                         parent::getRegistry()->parseQName($a[1], $ns, $id);
                         $tid = "{$ns}:{$id}";
                         //					$buf .= parent::describeClass($tid,null,"owl:Class");
                         //					$buf .= parent::triplify($tid,"rdfs:isDefinedBy",$ouri);
                     } else {
                         if ($a[0] == "name") {
                             //					$t = parent::triplifyString($tid,"rdfs:label",str_replace(array("\"", "'"), array("","\\\'"), stripslashes($a[1]))." [$tid]");
                             $label = str_replace(array("\"", "'"), array("", "\\\\'"), stripslashes($a[1]));
                             $t = parent::describeIndividual($tid, $label, "owl:Class");
                             $t .= parent::triplify($tid, "rdfs:isDefinedBy", $ouri);
                             $min .= $t;
                             $buf .= $t;
                         } else {
                             if ($a[0] == "def") {
                                 $t = str_replace(array("'", "\"", "\\", "\\\\'"), array("\\\\'", "", "", ""), $a[1]);
                                 $min .= parent::triplifyString($tid, "dc:description", $t);
                                 $buf .= parent::triplifyString($tid, "dc:description", $t);
                             } else {
                                 if ($a[0] == "property_value") {
                                     $b = explode(" ", $a[1]);
                                     $buf .= parent::triplifyString($tid, "obo_vocabulary:" . strtolower($b[0]), str_replace("\"", "", strtolower($b[1])));
                                 } else {
                                     if ($a[0] == "xref") {
                                         // http://upload.wikimedia.org/wikipedia/commons/3/34/Anatomical_Directions_and_Axes.JPG
                                         // Medical Dictionary:http\://www.medterms.com/
                                         // KEGG COMPOUND:C02788 "KEGG COMPOUND"
                                         // id-validation-regexp:\"REACT_[0-9\]\{1\,4}\\.[0-9\]\{1\,3}|[0-9\]+\"
                                         //$a[1] = 'id-validation-regexp:\"REACT_[0-9\]\{1\,4}\\.[0-9\]\{1\,3}|[0-9\]+\"';
                                         if (substr($a[1], 0, 4) == "http") {
                                             $buf .= parent::triplify($tid, "rdfs:seeAlso", str_replace(array(" ", '"wiki"', "\\"), array("+", "", ""), $a[1]));
                                         } else {
                                             $b = explode(":", $a[1], 2);
                                             if (substr($b[1], 0, 4) == "http") {
                                                 $buf .= parent::triplify($tid, "rdfs:seeAlso", stripslashes($b[1]));
                                             } else {
                                                 $ns = str_replace(array(" ", "\\"), "", strtolower($b[0]));
                                                 $id = trim($b[1]);
                                                 // there may be a comment to remove
                                                 if (FALSE !== ($pos = strrpos($id, ' "'))) {
                                                     $comment = substr($id, $pos + 1, -1);
                                                     $id = substr($id, 0, $pos);
                                                 }
                                                 $id = stripslashes($id);
                                                 // there may be a source statement to remove
                                                 $id = preg_replace("/{.*\\}/", "", $id);
                                                 if ($ns == "pmid") {
                                                     $ns = "pubmed";
                                                     $y = explode(" ", $id);
                                                     $id = $y[0];
                                                 }
                                                 if ($ns == "xx") {
                                                     continue;
                                                 }
                                                 if ($ns == "icd9cm") {
                                                     $y = explode(" ", $id);
                                                     $id = $y[0];
                                                 }
                                                 if ($ns == "xref; umls_cui") {
                                                     continue;
                                                 }
                                                 if ($ns == "submitter") {
                                                     $ns = "chebi.submitter";
                                                 }
                                                 if ($ns == "wikipedia" || $ns == "mesh") {
                                                     $id = str_replace(" ", "+", $id);
                                                 }
                                                 if ($ns == "id-validation-regexp") {
                                                     $buf .= parent::triplifyString($tid, "obo_vocabulary:{$ns}", addslashes($id));
                                                 } else {
                                                     $buf .= parent::triplify($tid, "obo_vocabulary:x-{$ns}", "{$ns}:" . str_replace(" ", "-", $id));
                                                 }
                                             }
                                         }
                                     } else {
                                         if ($a[0] == "synonym") {
                                             // synonym: "entidades moleculares" RELATED [IUPAC:]
                                             // synonym: "molecular entity" EXACT IUPAC_NAME [IUPAC:]
                                             // synonym: "Chondrococcus macrosporus" RELATED synonym [NCBITaxonRef:Krzemieniewska_and_Krzemieniewski_1926]
                                             //grab string inside double quotes
                                             preg_match('/"(.*)"(.*)/', $a[1], $matches);
                                             if (!empty($matches)) {
                                                 $a[1] = str_replace(array("\\", "\"", "'"), array("", "", "\\\\'"), $matches[1] . $matches[2]);
                                             } else {
                                                 $a[1] = str_replace(array("\"", "'"), array("", "\\\\'"), $a[1]);
                                             }
                                             $rel = "SYNONYM";
                                             $list = array("EXACT", "BROAD", "RELATED", "NARROW");
                                             $found = false;
                                             foreach ($list as $keyword) {
                                                 // get everything after the keyword up until the bracket [
                                                 if (FALSE !== ($k_pos = strpos($a[1], $keyword))) {
                                                     $str_len = strlen($a[1]);
                                                     $keyword_len = strlen($keyword);
                                                     $keyword_end_pos = $k_pos + $keyword_len;
                                                     $b1_pos = strrpos($a[1], "[");
                                                     $b2_pos = strrpos($a[1], "]");
                                                     $b_text = substr($a[1], $b1_pos + 1, $b2_pos - $b1_pos - 1);
                                                     $diff = $b1_pos - $keyword_end_pos - 1;
                                                     if ($diff != 0) {
                                                         // then there is more stuff here
                                                         $k = substr($a[1], $keyword_end_pos + 1, $diff);
                                                         $rel = trim($k);
                                                     } else {
                                                         // create the long predicate
                                                         $rel = $keyword . "_SYNONYM";
                                                     }
                                                     $found = true;
                                                     $str = substr($a[1], 0, $k_pos - 1);
                                                     break;
                                                 }
                                             }
                                             // check to see if we still haven't found anything
                                             if ($found === false) {
                                                 // we didn't find one of the keywords
                                                 // so take from the start to the bracket
                                                 $b1_pos = strrpos($a[1], "[");
                                                 $str = substr($a[1], 0, $b1_pos - 1);
                                             }
                                             $rel = str_replace(" ", "_", $rel);
                                             // $lit = addslashes($str.($b_text?" [".$b_text."]":""));
                                             $l = parent::triplifyString($tid, "obo_vocabulary:" . strtolower($rel), $str);
                                             $buf .= $l;
                                         } else {
                                             if ($a[0] == "alt_id") {
                                                 parent::getRegistry()->parseQname($a[1], $ns, $id);
                                                 if ($id != 'curators') {
                                                     $buf .= parent::triplify("{$ns}:{$id}", "rdfs:seeAlso", stripslashes($tid));
                                                 }
                                             } else {
                                                 if ($a[0] == "is_a") {
                                                     // do subclassing
                                                     parent::getRegistry()->parseQName($a[1], $ns, $id);
                                                     $t = parent::triplify($tid, "rdfs:subClassOf", "{$ns}:{$id}");
                                                     $buf .= $t;
                                                     $min .= $t;
                                                     $is_a = true;
                                                 } else {
                                                     if ($a[0] == "intersection_of") {
                                                         if (!isset($intersection_of)) {
                                                             // $intersection_of = '<'.parent::getRegistry()->getFQURI($tid).'> <'.parent::getRegistry()->getFQURI('owl:equivalentClass').'> [<'.parent::getRegistry()->getFQURI('rdf:type').'> <'.parent::getRegistry()->getFQURI('owl:Class').'>; <'.parent::getRegistry()->getFQURI('owl:intersectionOf').'> (';
                                                             $intersection_of = '<' . parent::getRegistry()->getFQURI($tid) . '> <' . parent::getRegistry()->getFQURI('owl:equivalentClass') . '> _:b' . ++$bid . " {$graph_uri} ." . PHP_EOL;
                                                             $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('rdf:type') . '> <' . parent::getRegistry()->getFQURI('owl:Class') . "> {$graph_uri} ." . PHP_EOL;
                                                             $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:intersectionOf') . '> _:b' . ++$bid . " {$graph_uri} ." . PHP_EOL;
                                                         }
                                                         /*
                                                         intersection_of: ECO:0000206 ! BLAST evidence
                                                         intersection_of: develops_from VAO:0000092 ! chondrogenic condensation
                                                         intersection_of: OBO_REL:has_part VAO:0000040 ! cartilage tissue
                                                         */
                                                         $c = explode(" ", $a[1]);
                                                         if (count($c) == 1) {
                                                             // just a class
                                                             parent::getRegistry()->parseQName($c[0], $ns, $id);
                                                             $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('rdfs:subClassOf') . '> <' . parent::getRegistry()->getFQURI("{$ns}:{$id}") . "> {$graph_uri} ." . PHP_EOL;
                                                             $buf .= parent::triplify($tid, "rdfs:subClassOf", "{$ns}:{$id}");
                                                         } else {
                                                             if (count($c) == 2) {
                                                                 // an expression
                                                                 parent::getRegistry()->parseQName($c[0], $pred_ns, $pred_id);
                                                                 parent::getRegistry()->parseQName($c[1], $obj_ns, $obj_id);
                                                                 $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:onProperty') . '> <' . parent::getRegistry()->getFQURI("obo_vocabulary:" . $pred_id) . "> {$graph_uri} ." . PHP_EOL;
                                                                 $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:someValuesFrom') . '> <' . parent::getRegistry()->getFQURI("{$obj_ns}:{$obj_id}") . ">  {$graph_uri} ." . PHP_EOL;
                                                                 $buf .= parent::triplify($tid, "obo_vocabulary:{$pred_id}", "{$obj_ns}:{$obj_id}");
                                                             }
                                                         }
                                                     } else {
                                                         if ($a[0] == "relationship") {
                                                             if (!isset($relationship)) {
                                                                 $relationship = '<' . parent::getRegistry()->getFQURI($tid) . '> <' . parent::getRegistry()->getFQURI('rdfs:subClassOf') . '> _:b' . ++$bid . " {$graph_uri} ." . PHP_EOL;
                                                                 $relationship .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('rdf:type') . '> <' . parent::getRegistry()->getFQURI('owl:Class') . "> {$graph_uri} ." . PHP_EOL;
                                                                 $relationship .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:intersectionOf') . '> _:b' . ++$bid . " {$graph_uri} ." . PHP_EOL;
                                                             }
                                                             /*
                                                             relationship: develops_from VAO:0000092 ! chondrogenic condensation
                                                             relationship: OBO_REL:has_part VAO:0000040 ! cartilage tissue
                                                             */
                                                             $c = explode(" ", $a[1]);
                                                             if (count($c) == 1) {
                                                                 // just a class
                                                                 parent::getRegistry()->parseQName($c[0], $ns, $id);
                                                                 $relationship .= parent::getRegistry()->getFQURI("{$ns}:{$id}");
                                                                 $buf .= parent::triplify($tid, "rdfs:subClassOf", "{$ns}:{$id}");
                                                             } else {
                                                                 if (count($c) == 2) {
                                                                     // an expression
                                                                     parent::getRegistry()->parseQName($c[0], $pred_ns, $pred_id);
                                                                     parent::getRegistry()->parseQName($c[1], $obj_ns, $obj_id);
                                                                     $relationship .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:onProperty') . '> <' . parent::getRegistry()->getFQURI("obo_vocabulary:" . $pred_id) . ">  {$graph_uri} ." . PHP_EOL;
                                                                     $relationship .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:someValuesFrom') . '> <' . parent::getRegistry()->getFQURI("{$obj_ns}:{$obj_id}") . "> {$graph_uri} ." . PHP_EOL;
                                                                     $buf .= parent::triplify($tid, "obo_vocabulary:{$pred_id}", "{$obj_ns}:{$obj_id}");
                                                                 }
                                                             }
                                                         } else {
                                                             // default handler
                                                             if (isset($a[1])) {
                                                                 $buf .= parent::triplifyString($tid, "obo_vocabulary:{$a['0']}", str_replace(array("\"", "'"), array("", "\\\\'"), stripslashes($a[1])));
                                                             }
                                                         }
                                                     }
                                                 }
                                             }
                                         }
                                     }
                                 }
                             }
                         }
                     }
                 }
             } else {
                 //header
                 //format-version: 1.0
                 $buf .= parent::triplifyString($ouri, "obo_vocabulary:{$a['0']}", str_replace(array('"', '\\:'), array('\\"', ':'), isset($a[1]) ? $a[1] : ""));
             }
         }
         if ($minimal || $minimalp) {
             parent::getWriteFile()->write($min);
         } else {
             parent::getWriteFile()->write($buf);
         }
         $min = '';
         $buf = '';
         $header = '';
     }
     //if(isset($intersection_of))  $buf .= $intersection_of.")].".PHP_EOL;
     //if(isset($relationship))  $buf .= $relationship.")].".PHP_EOL;
     if ($minimal || $minimalp) {
         parent::getWriteFile()->Write($min);
     } else {
         parent::getWriteFile()->write($buf);
     }
 }