function Run() { $idir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $files = parent::getParameterValue('files'); if ($files == 'all') { $list = explode('|', parent::getParameterList('files')); array_shift($list); } else { $list = explode(',', parent::getParameterValue('files')); } $dataset_description = ''; foreach ($list as $item) { $lfile = $idir . $item . '.rpt'; $rfile = parent::getParameterValue('download_url') . $item . '.rpt'; if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') { echo "downloading {$item}..."; $ret = Utils::DownloadSingle($rfile, $lfile); if ($ret != true) { continue; } } parent::setReadFile($lfile, true); echo "Processing {$item}..."; $ofile = $odir . $item . '.' . parent::getParameterValue('output_format'); $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false; parent::setWriteFile($ofile, $gz); $this->{$item}(); parent::getWriteFile()->close(); parent::getReadFile()->close(); echo "Done" . PHP_EOL; parent::clear(); $source_file = (new DataResource($this))->setURI($rfile)->setTitle("MGI {$item}")->setRetrievedDate(date("Y-m-d\\TH:i:s", filemtime($lfile)))->setFormat("text")->setPublisher("http://www.informatics.jax.org")->setHomepage("http://www.informatics.jax.org")->setRights("use")->setLicense("http://www.informatics.jax.org/mgihome/other/copyright.shtml")->setDataset("http://identifiers.org/mgi/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TH:i:s"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$item} in {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/mgi/mgi.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); } //foreach // generate the dataset release file $this->setWriteFile($odir . parent::getBio2RDFReleaseFile()); $this->getWriteFile()->write($dataset_description); $this->getWriteFile()->close(); echo "done!" . PHP_EOL; }
function Run() { $file = "hgnc_complete_set.txt.gz"; $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); $lfile = $ldir . $file; if (!file_exists($lfile) && parent::getParameterValue('download') == false) { trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE); parent::setParameterValue('download', true); } //download the hgnc file $rfile = null; if (parent::getParameterValue('download') == true) { $rfile = $rdir; echo "downloading {$file} ... "; Utils::DownloadSingle($rfile, $lfile); } $ofile = $odir . "hgnc." . parent::getParameterValue('output_format'); $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } parent::setWriteFile($ofile, $gz); parent::setReadFile($lfile, true); echo "processing {$file}... "; $this->process(); echo "done!" . PHP_EOL; //close write file parent::getWriteFile()->close(); echo PHP_EOL; // generate the dataset release file echo "generating dataset release file... "; $dataset_description = ''; $source_file = (new DataResource($this))->setURI($rdir)->setTitle('HUGO Gene Nomenclature Committee (HGNC)')->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat('text/tab-separated-value')->setFormat('application/zip')->setPublisher('http://www.genenames.org/')->setHomepage('http://www.genenames.org/data/gdlw_columndef.html')->setRights('use')->setRights('attribution')->setLicense('http://www.genenames.org/about/overview')->setDataset(parent::getDatasetURI()); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/hgnc/hgnc.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("restricted-by-source-license")->setLicense("http://creativecommons/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); $this->setWriteFile($odir . $this->getBio2RDFReleaseFile()); $this->getWriteFile()->write($dataset_description); $this->getWriteFile()->close(); }
function Run() { $file = "hgnc.tab"; $ldir = $this->GetParameterValue('indir'); $odir = $this->GetParameterValue('outdir'); $rdir = $this->GetParameterValue('download_url'); //make sure directories end with slash if (substr($ldir, -1) !== "/") { $ldir = $ldir . "/"; } if (substr($odir, -1) !== "/") { $odir = $odir . "/"; } $lfile = $ldir . $file; if (!file_exists($lfile) && $this->GetParameterValue('download') == false) { trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE); $this->SetParameterValue('download', true); } //download all files [except mapping file] if ($this->GetParameterValue('download') == true) { $rfile = $rdir; echo "downloading {$file} ... "; Utils::DownloadSingle($rfile, $lfile); } $ofile = $odir . $file . '.nt'; $gz = false; if ($this->GetParameterValue('graph_uri')) { $ofile = $odir . $file . '.nq'; } if ($this->GetParameterValue('gzip')) { $ofile .= '.gz'; $gz = true; } $this->SetWriteFile($ofile, $gz); $this->SetReadFile($lfile); echo "processing {$file}... "; $this->process(); echo "done!"; //close write file $this->GetWriteFile()->Close(); echo PHP_EOL; // generate the dataset release file echo "generating dataset release file... "; $desc = $this->GetBio2RDFDatasetDescription($this->GetNamespace(), "https://github.com/bio2rdf/bio2rdf-scripts/blob/master/hgnc/hgnc.php", $this->GetBio2RDFDownloadURL($this->GetNamespace()), "http://www.genenames.org", array("use"), "http://www.genenames.org/about/overview", $this->GetParameterValue('download_url'), $this->version); $this->SetWriteFile($odir . $this->GetBio2RDFReleaseFile($this->GetNamespace())); $this->GetWriteFile()->Write($desc); $this->GetWriteFile()->Close(); echo "done!" . PHP_EOL; }
function Run() { // get the file list if ($this->GetParameterValue('files') == 'all') { $files = explode("|", $this->GetParameterList('files')); array_shift($files); } else { $files = explode("|", $this->GetParameterValue('files')); } $ldir = $this->GetParameterValue('indir'); $odir = $this->GetParameterValue('outdir'); $rdir = $this->GetParameterValue('download_url'); // check if exists $file = $files[0]; $lfile = $ldir . $file; if (!file_exists($lfile)) { trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE); $this->SetParameterValue('download', true); } // download if ($this->GetParameterValue('download') == true) { $rfile = $rdir . $file; trigger_error("Downloading {$file} from {$rfile}", E_USER_NOTICE); Utils::DownloadSingle($rfile, $lfile); } // set the write file, parse, write and close $ofile = 'drugbank.nt'; $gz = false; if ($this->GetParameterValue('graph_uri')) { $ofile = 'drugbank.nq'; } if ($this->GetParameterValue('gzip')) { $ofile .= '.gz'; $gz = true; } $this->SetWriteFile($odir . $ofile, $gz); $this->Parse($ldir, $file); $this->GetWriteFile()->Close(); // generate the release file $desc = $this->GetBio2RDFDatasetDescription($this->GetNamespace(), "https://github.com/bio2rdf/bio2rdf-scripts/blob/master/drugbank/drugbank.php", $this->GetBio2RDFDownloadURL($this->GetNamespace()) . $ofile, "http://drugbank.ca", array("use", "no-commercial"), "http://www.drugbank.ca/about", $this->GetParameterValue('download_url'), $this->version); $this->SetWriteFile($odir . $this->GetBio2RDFReleaseFile($this->GetNamespace())); $this->GetWriteFile()->Write($desc); $this->GetWriteFile()->Close(); return TRUE; }
function Run() { $idir = $this->GetParameterValue('indir'); $odir = $this->GetParameterValue('outdir'); $files = $this->GetParameterValue('files'); if ($files == 'all') { $list = explode('|', $this->GetParameterList('files')); array_shift($list); } else { $list = explode('|', $this->GetParameterValue('files')); } foreach ($list as $item) { $lfile = $idir . $item . '.rpt'; $rfile = $this->GetParameterValue('download_url') . $item . '.rpt'; if (!file_exists($lfile) || $this->GetParameterValue('download') == 'true') { echo "downloading {$item}..."; Utils::DownloadSingle($rfile, $lfile); } $this->SetReadFile($lfile, true); echo "Processing {$item}..."; $ofile = $odir . "mgi-" . $item . '.nt'; $gz = false; if ($this->GetParameterValue('graph_uri')) { $ofile = $odir . "mgi-" . $item . '.nq'; } if ($this->GetParameterValue('gzip')) { $ofile .= '.gz'; $gz = true; } $this->SetWriteFile($ofile, $gz); $this->{$item}(); $this->GetWriteFile()->Close(); $this->GetReadFile()->Close(); echo "Done" . PHP_EOL; } }
public function run() { if (parent::getParameterValue('files') == 'all') { $files = explode("|", parent::getParameterList('files')); array_shift($files); } else { $files = explode(",", parent::getParameterValue('files')); } $release = parent::getParameterValue('release'); $releaseb = "WS249"; $remote_files = array("geneIDs" => "species/c_elegans/annotation/geneIDs/c_elegans.PRJNA13758." . $release . ".geneIDs.txt.gz", "functional_descriptions" => "species/c_elegans/annotation/functional_descriptions/c_elegans.PRJNA13758." . $release . ".functional_descriptions.txt.gz", "gene_interactions" => "species/c_elegans/annotation/gene_interactions/c_elegans.PRJNA13758." . $release . ".gene_interactions.txt.gz", "gene_associations" => "releases/current-production-release/ONTOLOGY/gene_association." . $releaseb . ".wb", "phenotype_associations" => "releases/current-production-release/ONTOLOGY/phenotype_association." . $releaseb . ".wb"); $local_files = array("geneIDs" => "wormbase." . parent::getParameterValue('release') . ".genes.txt.gz", "functional_descriptions" => "wormbase." . parent::getParameterValue('release') . ".functional_descriptions.txt.gz", "gene_interactions" => "wormbase." . parent::getParameterValue('release') . ".gene_interactions.txt.gz", "gene_associations" => "wormbase." . parent::getParameterValue('release') . ".gene_association.wb", "phenotype_associations" => "wormbase." . parent::getParameterValue('release') . ".phenotype_associations.wb"); $idir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); $dataset_description = ''; $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } foreach ($files as $file) { $lfile = $idir . $local_files[$file]; $rfile = $rdir . $remote_files[$file]; if (!file_exists($lfile) or parent::getParameterValue('download') == true) { trigger_error($lfile . " not found. Will attempt to download." . PHP_EOL, E_USER_WARNING); echo "Downloading {$rfile}... "; Utils::DownloadSingle($rfile, $lfile); echo "done!" . PHP_EOL; } if (strstr($lfile, "gz")) { parent::setReadFile($lfile, TRUE); } else { parent::setReadFile($lfile, FALSE); } $suffix = parent::getParameterValue('output_format'); $ofile = "wormbase." . $file . "." . $suffix; $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false; parent::setWriteFile($odir . $ofile, $gz); echo "Processing {$file}... "; $fnx = $file; $this->{$fnx}(); echo "done!" . PHP_EOL; parent::getWriteFile()->close(); // generate the dataset release file echo "Generating dataset description for {$ofile}... "; // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("WormBase Release " . parent::getParameterValue('release') . " subset ({$file})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://wormbase.org/")->setHomepage("http://wormbase.org/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("http://www.wormbase.org/about/policies")->setDataset("http://identifiers.org/wormbase/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/wormbase/wormbase.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); echo "done!" . PHP_EOL; } parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); }
function Run() { $ldir = $this->GetParameterValue('indir'); $odir = $this->GetParameterValue('outdir'); $rdir = $this->GetParameterValue('download_url'); //which files are to be converted? $selectedPackage = trim($this->GetParameterValue('files')); if ($selectedPackage == 'all') { $files = $this->getPackageMap(); } else { $sel_arr = explode(",", $selectedPackage); $pm = $this->getPackageMap(); $files = array(); foreach ($sel_arr as $a) { if (array_key_exists($a, $pm)) { $files[$a] = $pm[$a]; } } } //now iterate over the files array foreach ($files as $id => $file) { echo "Processing {$id} ..."; $lfile = $ldir . $id . ".gz"; // download if (!file_exists($lfile) || $this->GetParameterValue('download') == true) { echo "downloading ... "; //don't use subdirectory GENE_INFO for saving local version of All_data.gene_info.gz if ($id == "gene2sts" || $id == "gene2unigene") { $rfile = "compress.zlib://" . $rdir . $file; } else { $rfile = $rdir . $file; } Utils::DownloadSingle($rfile, $lfile); } $writefile = $odir . $id . ".nt"; $gz = false; if ($this->GetParameterValue('graph_uri')) { $writefile = $odir . $id . ".nq"; } if ($this->GetParameterValue('gzip')) { $writefile .= '.gz'; $gz = true; } $this->SetReadFile($lfile, true); $this->SetWriteFile($writefile, $gz); echo 'parsing ...'; $this->{$id}(); echo 'done.' . PHP_EOL; $this->GetReadFile()->Close(); $this->GetWriteFile()->Close(); $bio2rdf_download_files[] = $this->GetBio2RDFDownloadURL($this->GetNamespace()) . $writefile; } //foreach // generate the release file $desc = $this->GetBio2RDFDatasetDescription($this->GetNamespace(), "https://github.com/bio2rdf/bio2rdf-scripts/blob/master/gene/entrez_gene.php", $bio2rdf_download_files, "http://www.ncbi.nlm.nih.gov/gene", array("use-share-modify"), "http://www.ncbi.nlm.nih.gov/About/disclaimer.html", "ftp://ftp.ncbi.nih.gov/gene/", $this->version); $this->SetWriteFile($odir . $this->GetBio2RDFReleaseFile($this->GetNamespace())); $this->GetWriteFile()->Write($desc); $this->GetWriteFile()->Close(); return TRUE; }
function Run() { $ldir = $this->GetParameterValue('indir'); $odir = $this->GetParameterValue('outdir'); $rfile = $this->GetParameterValue('download_url'); $lfile = substr($rfile, strrpos($rfile, "/") + 1); // check if exists if (!file_exists($ldir . $lfile) or parent::getParameterValue('download') == 'true') { echo "dowloading {$rfile} ..."; trigger_error("Will attempt to download ", E_USER_NOTICE); Utils::DownloadSingle($rfile, $ldir . $lfile); echo "done" . PHP_EOL; } // make sure we have the zip archive $zin = new ZipArchive(); if ($zin->open($ldir . $lfile) === FALSE) { trigger_error("Unable to open {$ldir}{$lfile}"); exit; } // get the work if ($this->GetParameterValue('files') == 'all') { $files = explode("|", $this->GetParameterList('files')); array_shift($files); } else { $files = explode("|", $this->GetParameterValue('files')); } $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } $outfile = "ndc." . parent::getParameterValue('output_format'); parent::setWriteFile($odir . $outfile, $gz); // now go through each item in the zip file and process foreach ($files as $file) { echo "Processing {$file}... "; $fpin = $zin->getStream($file . ".txt"); if (!$fpin) { trigger_error("Unable to get pointer to {$file} in {$ldir}{$lfile}", E_USER_ERROR); return FALSE; } $this->{$file}($fpin); parent::writeRDFBufferToWriteFile(); echo "done!" . PHP_EOL; } parent::getWriteFile()->close(); echo "Generating dataset description for {$outfile}... "; //start generating dataset description file $dataset_description = ''; $source_file = (new DataResource($this))->setURI($rfile)->setTitle("FDA National Drug Code Directory")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($ldir . $lfile)))->setFormat("text/tab-separated-value")->setFormat("application/zip")->setPublisher("http://www.fda.gov")->setHomepage("http://www.fda.gov/Drugs/InformationOnDrugs/ucm142438.htm")->setRights("use-share")->setLicense(null)->setDataset("http://identifiers.org/ndc/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ndc/ndc.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description = $source_file->toRDF() . $output_file->toRDF(); //write dataset description to file parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function Run() { // get the file list if (parent::getParameterValue('files') == 'all') { $files = array('all'); } else { $files = explode(",", parent::getParameterValue('files')); } $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); $dataset_description = ''; foreach ($files as $file) { $download = parent::getParameterValue('download'); $version = parent::getParameterValue("version"); $zip_file = ucfirst($file) . ".mitab." . $version . ".txt.zip"; $lfile = $ldir . $zip_file; $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; $ofile = "irefindex-" . $file . "." . parent::getParameterValue('output_format'); if (!file_exists($lfile)) { trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE); $download = true; } $rfile = $rdir . $zip_file; if ($download == true) { echo "downloading {$rfile}" . PHP_EOL; if (FALSE === Utils::DownloadSingle($rfile, $lfile)) { trigger_error("Error in Download"); return FALSE; } } $zin = new ZipArchive(); if ($zin->open($lfile) === FALSE) { trigger_error("Unable to open {$lfile}"); exit; } if ($zin->numFiles != 1) { trigger_error("Found more than one file ... using first file"); } $f = $zin->statIndex(0); $base_file = $f['name']; if (($fp = $zin->getStream($base_file)) === FALSE) { trigger_error("Unable to get {$base_file} in ziparchive {$lfile}"); return FALSE; } parent::setReadFile($lfile); parent::getReadFile()->setFilePointer($fp); echo "Processing " . $file . " ..."; parent::setWriteFile($odir . $ofile, true); if ($this->Parse() === FALSE) { trigger_error("Parsing Error"); exit; } parent::writeRDFBufferToWriteFile(); parent::getWriteFile()->close(); $zin->close(); echo "Done!" . PHP_EOL; $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("iRefIndex ({$zip_file}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/zip")->setPublisher("http://irefindex.uio.no")->setHomepage("http://irefindex.uio.no")->setRights("use")->setRights("by-attribution")->setRights("no-commercial")->setLicense("http://irefindex.uio.no/wiki/README_MITAB2.6_for_iRefIndex#License")->setDataset("http://identifiers.org/irefindex/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/irefindex/irefindex.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); parent::setGraphURI($graph_uri); } parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); return TRUE; }
function Run() { $ldir = $this->GetParameterValue('indir'); $odir = $this->GetParameterValue('outdir'); $rfile = $this->GetParameterValue('download_url'); $lfile = substr($rfile, strrpos($rfile, "/") + 1); // check if exists if (!file_exists($ldir . $lfile)) { trigger_error($ldir . $lfile . " not found. Will attempt to download. ", E_USER_NOTICE); $this->SetParameterValue('download', true); } // download if ($this->GetParameterValue('download') == true) { trigger_error("Downloading {$rfile}", E_USER_NOTICE); Utils::DownloadSingle($rfile, $ldir . $lfile); } // make sure we have the zip archive $zin = new ZipArchive(); if ($zin->open($ldir . $lfile) === FALSE) { trigger_error("Unable to open {$ldir}{$lfile}"); exit; } // get the work if ($this->GetParameterValue('files') == 'all') { $files = explode("|", $this->GetParameterList('files')); array_shift($files); } else { $files = explode("|", $this->GetParameterValue('files')); } // now go through each item in the zip file and process foreach ($files as $file) { echo "Processing {$file} ..."; // the file name in the zip archive is Product not product if ($file == "product") { $file = ucfirst($file); } $fpin = $zin->getStream($file . ".txt"); if (!$fpin) { trigger_error("Unable to get pointer to {$file} in {$ldir}{$lfile}", E_USER_ERROR); exit("failed\n"); } // set the write file $outfile = $file . '.nt'; $gz = false; if ($this->GetParameterValue('graph_uri')) { $outfile = $file . '.nq'; } if ($this->GetParameterValue('gzip')) { $outfile .= '.gz'; $gz = true; } $bio2rdf_download_files[] = $this->GetBio2RDFDownloadURL($this->GetNamespace()) . $outfile; $this->SetWriteFile($odir . $outfile, $gz); // process $this->{$file}($fpin); // write to file $this->WriteRDFBufferToWriteFile(); $this->GetWriteFile()->Close(); echo "done!" . PHP_EOL; } // generate the release file $this->DeleteBio2RDFReleaseFiles($odir); $desc = $this->GetBio2RDFDatasetDescription($this->GetNamespace(), "https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ndc/ndc.php", $bio2rdf_download_files, "http://www.fda.gov/Drugs/InformationOnDrugs/ucm142438.htm", array("use-share"), null, $this->GetParameterValue('download_url'), $this->version); $this->SetWriteFile($odir . $this->GetBio2RDFReleaseFile($this->GetNamespace())); $this->GetWriteFile()->Write($desc); $this->GetWriteFile()->Close(); }
function process() { if (parent::getParameterValue('files') == 'all') { $files = explode("|", parent::getParameterList('files')); array_shift($files); } else { $files = explode(",", parent::getParameterValue('files')); } $remote_files = array("human" => "human_genes.zip", "models" => "models_genes.zip"); $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); $dataset_description = ''; $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } foreach ($files as $file) { $lfile = $ldir . $remote_files[$file]; $rfile = $rdir . $remote_files[$file]; if (!file_exists($lfile)) { trigger_error($lfile . " not found. Will attempt to download." . PHP_EOL, E_USER_WARNING); echo "Downloading {$rfile}... "; Utils::DownloadSingle($rfile, $lfile); echo "done!" . PHP_EOL; } $suffix = parent::getParameterValue('output_format'); $ofile = "genage_" . $file . '.' . $suffix; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } $zin = new ZipArchive(); if ($zin->open($lfile) === FALSE) { trigger_error("Unable to open {$lfile}"); exit; } if ($file == "human") { $zipentry = "genage_human.csv"; } else { if ($file == "models") { $zipentry = "genage_models.csv"; } } if (($fp = $zin->getStream($zipentry)) === FALSE) { trigger_error("Unable to get {$zipentry} in ziparchive {$lfile}"); return FALSE; } parent::SetReadFile($lfile); parent::GetReadFile()->SetFilePointer($fp); // set the write file, parse, write and close $suffix = parent::getParameterValue('output_format'); $outfile = "genage_" . $file . '.' . $suffix; $gz = false; if (strstr($suffix, "gz")) { $gz = true; } parent::setWriteFile($odir . $ofile, $gz); echo "Processing {$lfile}... "; $fnx = $file; $this->{$fnx}(); echo "done!" . PHP_EOL; parent::getWriteFile()->close(); // generate the dataset release file echo "Generating dataset description for {$ofile}... "; // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Human Ageing Genomic Resources GenAge database (" . $remote_files[$file] . ")")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/comma-separated-value")->setFormat("application/gzip")->setPublisher("http://genomics.senescence.info/")->setHomepage("http://genomics.senescence.info/genes/")->setRights("use")->setLicense("http://genomics.senescence.info/legal.html")->setDataset("http://identifiers.org/genage/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/genage/genage.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); echo "done!" . PHP_EOL; } parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); }
function Run() { // get the file list if ($this->GetParameterValue('files') == 'all') { $files = explode("|", $this->GetParameterList('files')); array_shift($files); } else { $files = explode(",", $this->GetParameterValue('files')); } $ldir = $this->GetParameterValue('indir'); $odir = $this->GetParameterValue('outdir'); $rdir = $this->GetParameterValue('download_url'); $gz_suffix = ".gz"; foreach ($files as $file) { $lfile = $ldir . $file . $gz_suffix; $ofile = $file . ".nt"; $gz = false; if ($this->GetParameterValue('graph_uri')) { $ofile = $file . '.nq'; } if ($this->GetParameterValue('gzip')) { $ofile .= '.gz'; $gz = true; } $bio2rdf_download_files[] = $this->GetBio2RDFDownloadURL($this->GetNamespace()) . $ofile; if (!file_exists($lfile)) { trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE); $this->SetParameterValue('download', true); } if ($this->GetParameterValue('download') == true) { if ($file == 'chem_gene_ixn_types') { $suffix = '.tsv'; } else { if ($file == 'exposure_ontology') { $suffix = '.obo'; } else { $suffix = ".tsv.gz"; } } $rfile = $rdir . 'CTD_' . $file . $suffix; if ($suffix == ".tsv.gz") { Utils::DownloadSingle($rfile, $lfile); } else { Utils::DownloadSingle($rfile, "compress.zlib://" . $lfile); } } echo "Processing " . $file . " ..."; $this->SetReadFile($lfile, true); $this->SetWriteFile($odir . $ofile, $gz); $fnx = "CTD_" . $file; if ($this->{$fnx}() === FALSE) { trigger_error("Error in {$fnx}"); exit; } $this->WriteRDFBufferToWriteFile(); $this->GetWriteFile()->Close(); echo "Done!" . PHP_EOL; } // generate the release file $desc = $this->GetBio2RDFDatasetDescription($this->GetNamespace(), "https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ctd/ctd.php", $bio2rdf_download_files, "http://ctdbase.org", array("use", "no-commercial"), "http://ctdbase.org/about/legal.jsp", $this->GetParameterValue('download_url'), $this->version); $this->SetWriteFile($odir . $this->GetBio2RDFReleaseFile($this->GetNamespace())); $this->GetWriteFile()->Write($desc); $this->GetWriteFile()->Close(); return TRUE; }
function Run() { // get the file list if ($this->GetParameterValue('files') == 'all') { $files = explode("|", $this->GetParameterList('files')); array_shift($files); } else { $files = explode(",", $this->GetParameterValue('files')); } $ldir = $this->GetParameterValue('indir'); $odir = $this->GetParameterValue('outdir'); $rdir = $this->GetParameterValue('download_url'); foreach ($files as $file) { if ($file == 'variant_annotations') { $lfile = $ldir . "annotations.zip"; if (!file_exists($lfile)) { echo "Contact PharmGKB to get access to variants/clinical variants; save file as annotations.zip" . PHP_EOL; continue; } } else { // check if exists $lfile = $ldir . $file . ".zip"; if (!file_exists($lfile)) { trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE); $this->SetParameterValue('download', true); } } // download if ($this->GetParameterValue('download') == true) { $rfile = $rdir . $file . ".zip"; echo "downloading {$file}..."; if ($file == 'offsides') { Utils::DownloadSingle('http://www.pharmgkb.org/redirect.jsp?p=ftp%3A%2F%2Fftpuserd%3AGKB4ftp%40ftp.pharmgkb.org%2Fdownload%2Ftatonetti%2F3003377s-offsides.zip', $lfile); } elseif ($file == 'twosides') { Utils::DownloadSingle('http://www.pharmgkb.org/redirect.jsp?p=ftp%3A%2F%2Fftpuserd%3AGKB4ftp%40ftp.pharmgkb.org%2Fdownload%2Ftatonetti%2F3003377s-twosides.zip', $lfile); } elseif ($file == 'pathways') { Utils::DownloadSingle('http://www.pharmgkb.org/commonFileDownload.action?filename=' . $file . '-tsv.zip', $lfile); } else { Utils::DownloadSingle('http://www.pharmgkb.org/commonFileDownload.action?filename=' . $file . '.zip', $lfile); } } // get a pointer to the file in the zip archive $zin = new ZipArchive(); if ($zin->open($lfile) === FALSE) { trigger_error("Unable to open {$lfile}"); exit; } if ($file == "variant_annotations") { $zipentries = array('clinical_ann_metadata.tsv', 'var_drug_ann.tsv', 'var_pheno_ann.tsv', 'var_fa_ann.tsv'); } else { if ($file == "relationships") { $zipentries = array("relationships.tsv"); } else { if ($file == 'offsides') { $zipentries = array('3003377s-offsides.tsv'); } else { if ($file == 'twosides') { $zipentries = array('3003377s-twosides.tsv'); } else { $zipentries = array($file . ".tsv"); } } } } // set the write file, parse, write and close $outfile = $odir . $file . '.nt'; $gz = false; if ($this->GetParameterValue('graph_uri')) { $outfile = $odir . $file . '.nq'; } if ($this->GetParameterValue('gzip')) { $outfile .= '.gz'; $gz = true; } $this->SetWriteFile($outfile, $gz); $bio2rdf_download_files[] = $this->GetBio2RDFDownloadURL($this->GetNamespace()) . $outfile; foreach ($zipentries as $zipentry) { if (($fp = $zin->getStream($zipentry)) === FALSE) { trigger_error("Unable to get {$file}.tsv in ziparchive {$lfile}"); return FALSE; } $this->SetReadFile($lfile); $this->GetReadFile()->SetFilePointer($fp); if ($file == "variant_annotations") { if ($zipentry == "clinical_ann_metadata.tsv") { $fnx = "clinical_ann_metadata"; } else { $fnx = 'variant_annotation'; } echo "processing {$zipentry}.."; } else { $fnx = $file; echo "processing {$fnx}.."; } $this->{$fnx}(); $this->WriteRDFBufferToWriteFile(); echo PHP_EOL; } $this->GetWriteFile()->Close(); } // foreach // generate the release file $desc = $this->GetBio2RDFDatasetDescription($this->GetNamespace(), "https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pharmgkb/pharmgkb.php", $bio2rdf_download_files, "http://pharmgkb.org", array("use", "no-commercial"), "http://pharmgkb.org", $this->GetParameterValue('download_url'), $this->version); $this->SetWriteFile($odir . $this->GetBio2RDFReleaseFile($this->GetNamespace())); $this->GetWriteFile()->Write($desc); $this->GetWriteFile()->Close(); return TRUE; }
function process() { // get the file list if (parent::getParameterValue('files') == 'all') { $files = explode("|", parent::getParameterList('files')); array_shift($files); } else { $files = explode(",", parent::getParameterValue('files')); } $dataset_description = ''; //set directory values $ldir = parent::getParameterValue('indir'); $rdir = parent::getParameterValue('download_url'); $odir = parent::getParameterValue('outdir'); $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } $gz_suffix = ".gz"; foreach ($files as $file) { if ($file == 'chem_gene_ixn_types') { $suffix = '.tsv'; } else { if ($file == 'exposure_ontology') { $suffix = '.obo'; } else { $suffix = ".tsv.gz"; } } $lfile = $ldir . $file . $gz_suffix; $rfile = $rdir . 'CTD_' . $file . $suffix; if (!file_exists($lfile)) { trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE); if ($suffix == ".tsv.gz") { Utils::DownloadSingle($rfile, $lfile); } else { Utils::DownloadSingle($rfile, "compress.zlib://" . $lfile); } } $out_suffix = parent::getParameterValue('output_format'); $ofile = "ctd_" . $file . "." . $out_suffix; $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } echo "Processing " . $file . " ..."; parent::setWriteFile($odir . $ofile, $gz); //set read file parent::setReadFile($lfile, TRUE); $fnx = "CTD_" . $file; $this->{$fnx}(); //close write file parent::getWriteFile()->close(); parent::clear(); echo "done!" . PHP_EOL; // generate the dataset release file echo "Generating dataset description... "; if ($file == "chemicals") { $dataset = "http://identifiers.org/ctd.chemical/"; } else { if ($file == "diseases") { $dataset = "http://identifiers.org/ctd.disease/"; } else { if ($file == "genes") { $dataset = "http://identifiers.org/ctd.gene/"; } else { $dataset = null; } } } // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Comparative Toxicogenomics Database ({$file}.{$gz_suffix}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://ctdbase.org/")->setHomepage("http://ctdbase.org/")->setRights("use")->setRights("by-attribution")->setRights("no-commercial")->setLicense("http://ctdbase.org/about/legal.jsp")->setDataset($dataset); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ctd/ctd.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); } parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function Run() { // directory shortcuts $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); // get the listings page $url = trim(parent::getParameterValue('download_url')); $listing_file = $ldir . "probeset_list.html"; if (!file_exists($listing_file) || parent::getParameterValue("download") == "true") { echo "Downloading {$listing_file}" . PHP_EOL; Utils::DownloadSingle($url, $listing_file); } $listings = file_get_contents($listing_file); // make a list of the csv.zip files preg_match_all("/\"([^\"]+)\\.csv\\.zip\"/", $listings, $m); if (count($m[1]) == 0) { trigger_error("could not find any .csv.zip files in {$url}"); exit; } if (parent::getParameterValue("files") == 'all') { $myfiles = $m[1]; } else { $a = explode(",", parent::getParameterValue("files")); foreach ($a as $f) { $found = false; foreach ($m[1] as $n) { if (strstr($n, $f)) { $found = true; $myfiles[] = $n; break; } } if ($found === false) { echo "cannot find {$f} in list" . PHP_EOL; } } } if (!isset($myfiles)) { exit; } // nothing to do $dataset_description = ''; // set the write file $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; $outfile = 'affymetrix.' . parent::getParameterValue('output_format'); $this->setWriteFile($odir . $outfile, $gz); // iterate over the files foreach ($myfiles as $rfile) { $base_file = substr($rfile, strrpos($rfile, "/") + 1); $base_url = substr($rfile, 0, strrpos($rfile, "/")); // get and set the dataset version if (parent::getDatasetVersion() == null) { preg_match("/\\.na([0-9]{2})\\.annot/", $base_file, $m); if (isset($m[1])) { $this->setDatasetVersion($m[1]); } } if (parent::getDatasetVersion() != parent::getParameterValue('version')) { $base_file = str_replace("na" . parent::getDatasetVersion(), "na" . parent::getParameterValue('version'), $base_file); } $csv_file = $base_file . ".csv"; $zip_file = $csv_file . ".zip"; $lfile = $ldir . $zip_file; if (!file_exists($lfile)) { echo "skipping: {$lfile} does not exist" . PHP_EOL; continue; } echo "processing {$lfile}" . PHP_EOL; // open the zip file $zin = new ZipArchive(); if ($zin->open($lfile) === FALSE) { trigger_error("Unable to open {$lfile}"); exit; } if (($fp = $zin->getStream($csv_file)) === FALSE) { trigger_error("Unable to get {$csv_file} in ziparchive {$lfile}"); return FALSE; } parent::setReadFile($lfile); parent::getReadFile()->setFilePointer($fp); $this->parse($base_file); parent::getReadFile()->close(); parent::clear(); // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Affymetrix Probeset: {$base_file}")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/zip")->setPublisher("http://affymetrix.com")->setHomepage("http://www.affymetrix.com/support/technical/annotationfilesmain.affx")->setRights("use")->setRights("no-commercial")->setRights("registration-required")->setLicense("http://www.affymetrix.com/about_affymetrix/legal/index.affx")->setDataset("http://identifiers.org/affy.probeset/"); $dataset_description .= $source_file->toRDF(); } $this->getWriteFile()->close(); // write the dataset description $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = parent::getDate(filemtime($odir . $outfile)); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/affymetrix/affymetrix.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $output_file->toRDF(); // write the dataset description $this->setWriteFile($odir . $this->getBio2RDFReleaseFile()); $this->getWriteFile()->write($dataset_description); $this->getWriteFile()->close(); return true; }
function Run() { // directory shortcuts $ldir = $this->GetParameterValue('indir'); $odir = $this->GetParameterValue('outdir'); // get the listings page $url = trim($this->GetParameterValue('download_url')); $listing_file = $ldir . "probeset_list.html"; if (!file_exists($listing_file) || $this->GetParameterValue("download") == "true") { echo "Downloading {$listing_file}" . PHP_EOL; Utils::DownloadSingle($url, $listing_file); } $listings = file_get_contents($listing_file); // get the csv.zip files preg_match_all("/\"([^\"]+)\\.csv\\.zip\"/", $listings, $m); if (count($m[1]) == 0) { trigger_error("could not find any .csv.zip files in {$url}"); exit; } if ($this->GetParameterValue("files") == 'all') { $myfiles = $m[1]; } else { $a = explode(",", $this->GetParameterValue("files")); foreach ($a as $f) { $found = false; foreach ($m[1] as $n) { if (strstr($n, $f)) { $found = true; $myfiles[] = $n; break; } } if ($found === false) { echo "cannot find {$f} in list" . PHP_EOL; } } } if (!isset($myfiles)) { exit; } // print_r($myfiles); foreach ($myfiles as $rfile) { // download $base_file = substr($rfile, strrpos($rfile, "/") + 1); $base_url = substr($rfile, 0, strrpos($rfile, "/")); echo "processing {$base_file}, from {$base_url}" . PHP_EOL; $csv_file = $base_file . ".csv"; $zip_file = $csv_file . ".zip"; $lfile = $ldir . $zip_file; if (!file_exists($lfile) || $this->GetParameterValue('download') == true) { $rfile = $url . $zip_file; trigger_error("Downloading {$zip_file} from {$rfile}", E_USER_NOTICE); if (Utils::Download($base_url, array($zip_file), $ldir) === FALSE) { trigger_error("Unable to download {$file}. skipping", E_USER_WARNING); continue; } } // open the zip file $zin = new ZipArchive(); if ($zin->open($lfile) === FALSE) { trigger_error("Unable to open {$lfile}"); exit; } if (($fp = $zin->getStream($csv_file)) === FALSE) { trigger_error("Unable to get {$csv_file} in ziparchive {$lfile}"); return FALSE; } $this->SetReadFile($lfile); $this->GetReadFile()->SetFilePointer($fp); // set the write file $outfile = $base_file . '.nt'; $gz = false; if ($this->GetParameterValue('graph_uri')) { $outfile = $base_file . '.nq'; } if ($this->GetParameterValue('gzip')) { $outfile .= '.gz'; $gz = true; } $bio2rdf_download_files[] = $this->GetBio2RDFDownloadURL($this->GetNamespace()) . $outfile; $this->SetWriteFile($odir . $outfile, $gz); $this->Parse(); $this->GetWriteFile()->Close(); $this->GetReadFile()->Close(); } // generate the release file $desc = $this->GetBio2RDFDatasetDescription($this->GetNamespace(), "https://github.com/bio2rdf/bio2rdf-scripts/blob/master/affymetrix/affymetrix.php", $bio2rdf_download_files, "dsfsdfs", "http://affymetrix.com/", array("use-share-modify", "no-commercial"), null, $this->GetParameterValue('download_url'), $this->version); $this->SetWriteFile($odir . $this->GetBio2RDFReleaseFile($this->GetNamespace())); $this->GetWriteFile()->Write($desc); $this->GetWriteFile()->Close(); return true; }
function run() { // get the file list if ($this->GetParameterValue('files') == 'all') { $files = explode("|", $this->GetParameterList('files')); array_shift($files); } else { $files = explode(",", $this->GetParameterValue('files')); } if ($this->getParameterValue('additional') != 'none') { $f = explode(",", $this->getParameterValue('additional')); $files = array_merge($files, $f); } $ldir = $this->GetParameterValue('indir'); $odir = $this->GetParameterValue('outdir'); $rdir = $this->GetParameterValue('download_url'); $dataset_description = ''; foreach ($files as $file) { $suffix = ".zip"; $lfile = $ldir . $file . $suffix; $rfile = $rdir . $file . $suffix; if ($file == "offsides" and !file_exists($lfile)) { echo "downloading twosides..."; $rfile = "http://www.pharmgkb.org/redirect.jsp?p=ftp%3A%2F%2Fftpuserd%3AGKB4ftp%40ftp.pharmgkb.org%2Fdownload%2Ftatonetti%2F3003377s-offsides.zip"; utils::DownloadSingle($rfile, $lfile); echo "done" . PHP_EOL; } elseif ($file == "twosides" and !file_exists($lfile)) { echo "downloading {$file} ..."; $rfile = "http://www.pharmgkb.org/redirect.jsp?p=ftp%3A%2F%2Fftpuserd%3AGKB4ftp%40ftp.pharmgkb.org%2Fdownload%2Ftatonetti%2F3003377s-twosides.zip"; utils::DownloadSingle($rfile, $lfile); echo "done" . PHP_EOL; } elseif ($file == 'annotations' or $file == 'relationships') { if (!file_exists($lfile)) { echo "Contact PharmGKB to get access to variants/clinical variants; save file as annotations.zip" . PHP_EOL; continue; } } else { if (!file_exists($lfile) or parent::getParameterValue('download') == true) { echo "Downloading {$lfile} ... "; Utils::DownloadSingle('https://www.pharmgkb.org/download.do?objId=' . $file . '.zip&dlCls=common', $lfile); echo "done" . PHP_EOL; } } // get a pointer to the file in the zip archive if (!file_exists($lfile)) { echo "no local copy of {$lfile} . skipping" . PHP_EOL; continue; } $zin = new ZipArchive(); if ($zin->open($lfile) === FALSE) { trigger_error("Unable to open {$lfile}"); exit; } $zipentries = array(); if ($file == "annotations") { // exclude: 'clinical_ann.tsv','study_parameters.tsv' $zipentries = array('clinical_ann_metadata.tsv', 'var_drug_ann.tsv', 'var_pheno_ann.tsv', 'var_fa_ann.tsv'); } else { if ($file == "pathways") { for ($i = 0; $i < $zin->numFiles; $i++) { $stat = $zin->statIndex($i); $entry = $stat['name']; $ext = pathinfo($entry, PATHINFO_EXTENSION); if ($ext != "txt") { $zipentries[] = $entry; } } } else { if ($file == "relationships") { $zipentries = array("relationships.tsv"); } else { if ($file == 'offsides') { $zipentries = array('3003377s-offsides.tsv'); } else { if ($file == 'twosides') { $zipentries = array('3003377s-twosides.tsv'); } else { $zipentries = array($file . ".tsv"); } } } } } // set the write file, parse, write and close $suffix = parent::getParameterValue('output_format'); $outfile = $file . '.' . $suffix; $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } $this->SetWriteFile($odir . $outfile, $gz); foreach ($zipentries as $zipentry) { if (($fp = $zin->getStream($zipentry)) === FALSE) { trigger_error("Unable to get {$file}.tsv in ziparchive {$lfile}"); return FALSE; } $this->SetReadFile($lfile); $this->GetReadFile()->SetFilePointer($fp); if ($file == "annotations") { $fnx = substr($zipentry, 0, strpos($zipentry, ".tsv")); echo "processing {$zipentry}.."; } else { if ($file == 'pathways') { $fnx = 'pathways'; echo "processing {$fnx} ({$zipentry})... "; } else { $fnx = $file; echo "processing {$fnx} ... "; } } $this->{$fnx}(); parent::writeRDFBufferToWriteFile(); parent::clear(); echo "done!" . PHP_EOL; // generate the dataset release file $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Pharmacogenomics Knowledge Base ({$zipentry})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/zip")->setPublisher("http://www.pharmgkb.org/")->setHomepage("http://www.pharmgkb.org/")->setRights("use")->setRights("no-commercial")->setLicense("http://www.pharmgkb.org/page/policies")->setDataset("http://identifiers.org/pharmgkb/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} {$file} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pharmgkb/pharmgkb.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); } $this->GetWriteFile()->Close(); } // foreach echo "Generating dataset description... "; parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function GetLatestNCBOOntology($ontology_id, $apikey, $target_filepath) { Utils::DownloadSingle('http://rest.bioontology.org/bioportal/virtual/download/' . $ontology_id . '?apikey=' . $apikey, $target_filepath); }
function GetLatestNCBOOntology($ontology_id, $apikey, $target_filepath) { Utils::DownloadSingle('http://data.bioontology.org/ontologies/' . $ontology_id . '/download?apikey=' . $apikey, $target_filepath); }