function Run() { $idir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $files = parent::getParameterValue('files'); if ($files == 'all') { $list = explode('|', parent::getParameterList('files')); array_shift($list); } else { $list = explode(',', parent::getParameterValue('files')); } $dataset_description = ''; foreach ($list as $item) { $lfile = $idir . $item . '.rpt'; $rfile = parent::getParameterValue('download_url') . $item . '.rpt'; if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') { echo "downloading {$item}..."; $ret = Utils::DownloadSingle($rfile, $lfile); if ($ret != true) { continue; } } parent::setReadFile($lfile, true); echo "Processing {$item}..."; $ofile = $odir . $item . '.' . parent::getParameterValue('output_format'); $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false; parent::setWriteFile($ofile, $gz); $this->{$item}(); parent::getWriteFile()->close(); parent::getReadFile()->close(); echo "Done" . PHP_EOL; parent::clear(); $source_file = (new DataResource($this))->setURI($rfile)->setTitle("MGI {$item}")->setRetrievedDate(date("Y-m-d\\TH:i:s", filemtime($lfile)))->setFormat("text")->setPublisher("http://www.informatics.jax.org")->setHomepage("http://www.informatics.jax.org")->setRights("use")->setLicense("http://www.informatics.jax.org/mgihome/other/copyright.shtml")->setDataset("http://identifiers.org/mgi/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TH:i:s"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$item} in {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/mgi/mgi.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); } //foreach // generate the dataset release file $this->setWriteFile($odir . parent::getBio2RDFReleaseFile()); $this->getWriteFile()->write($dataset_description); $this->getWriteFile()->close(); echo "done!" . PHP_EOL; }
function run() { $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $dd = ''; $files = parent::getParameterValue('files'); if ($files == 'all') { $files = explode('|', parent::getParameterList('files')); array_shift($files); } else { $files = explode(',', parent::getParameterValue('files')); } foreach ($files as $file) { echo "processing {$file} ..."; $lfile = $ldir . $this->filemap[$file]; $rfile = parent::getParameterValue('download_url') . $this->filemap[$file]; if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') { $ret = utils::downloadSingle($rfile, $lfile); if ($ret === false) { echo "unable to download {$file} ... skipping" . PHP_EOL; continue; } } parent::setReadFile($lfile, true); $suffix = parent::getParameterValue('output_format'); $ofile = "orphanet-" . $file . '.' . $suffix; $gz = strstr(parent::getParameterValue('output_format'), "gz") ? $gz = true : ($gz = false); /* parent::setWriteFile($odir.$ofile, $gz); $this->$file($lfile); parent::getWriteFile()->close(); */ parent::getReadFile()->close(); parent::clear(); echo "done!" . PHP_EOL; // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Orphanet: {$file}")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("application/xml")->setPublisher("http://www.orpha.net")->setHomepage("http://www.orpha.net/")->setRights("use")->setRights("sharing-modified-version-needs-permission")->setLicense("http://creativecommons.org/licenses/by-nd/3.0/")->setDataset("http://identifiers.org/orphanet/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = parent::getDate(filemtime($odir . $ofile)); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/orphanet/orphanet.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dd .= $source_file->toRDF() . $output_file->toRDF(); } //foreach parent::writeToReleaseFile($dd); }
function process() { // get the file list if (parent::getParameterValue('files') == 'all') { $files = explode("|", parent::getParameterList('files')); array_shift($files); } else { $files = explode(",", parent::getParameterValue('files')); } $dataset_description = ''; //set directory values $ldir = parent::getParameterValue('indir'); $rdir = parent::getParameterValue('download_url'); $odir = parent::getParameterValue('outdir'); $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } $gz_suffix = ".gz"; foreach ($files as $file) { if ($file == 'chem_gene_ixn_types') { $suffix = '.tsv'; } else { if ($file == 'exposure_ontology') { $suffix = '.obo'; } else { $suffix = ".tsv.gz"; } } $lfile = $ldir . $file . $gz_suffix; $rfile = $rdir . 'CTD_' . $file . $suffix; if (!file_exists($lfile)) { trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE); if ($suffix == ".tsv.gz") { Utils::DownloadSingle($rfile, $lfile); } else { Utils::DownloadSingle($rfile, "compress.zlib://" . $lfile); } } $out_suffix = parent::getParameterValue('output_format'); $ofile = "ctd_" . $file . "." . $out_suffix; $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } echo "Processing " . $file . " ..."; parent::setWriteFile($odir . $ofile, $gz); //set read file parent::setReadFile($lfile, TRUE); $fnx = "CTD_" . $file; $this->{$fnx}(); //close write file parent::getWriteFile()->close(); parent::clear(); echo "done!" . PHP_EOL; // generate the dataset release file echo "Generating dataset description... "; if ($file == "chemicals") { $dataset = "http://identifiers.org/ctd.chemical/"; } else { if ($file == "diseases") { $dataset = "http://identifiers.org/ctd.disease/"; } else { if ($file == "genes") { $dataset = "http://identifiers.org/ctd.gene/"; } else { $dataset = null; } } } // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Comparative Toxicogenomics Database ({$file}.{$gz_suffix}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://ctdbase.org/")->setHomepage("http://ctdbase.org/")->setRights("use")->setRights("by-attribution")->setRights("no-commercial")->setLicense("http://ctdbase.org/about/legal.jsp")->setDataset($dataset); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ctd/ctd.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); } parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
public function run() { if (parent::getParameterValue('files') == 'all') { $files = explode("|", parent::getParameterList('files')); array_shift($files); } else { $files = explode(",", parent::getParameterValue('files')); } $release = parent::getParameterValue('release'); $releaseb = "WS249"; $remote_files = array("geneIDs" => "species/c_elegans/annotation/geneIDs/c_elegans.PRJNA13758." . $release . ".geneIDs.txt.gz", "functional_descriptions" => "species/c_elegans/annotation/functional_descriptions/c_elegans.PRJNA13758." . $release . ".functional_descriptions.txt.gz", "gene_interactions" => "species/c_elegans/annotation/gene_interactions/c_elegans.PRJNA13758." . $release . ".gene_interactions.txt.gz", "gene_associations" => "releases/current-production-release/ONTOLOGY/gene_association." . $releaseb . ".wb", "phenotype_associations" => "releases/current-production-release/ONTOLOGY/phenotype_association." . $releaseb . ".wb"); $local_files = array("geneIDs" => "wormbase." . parent::getParameterValue('release') . ".genes.txt.gz", "functional_descriptions" => "wormbase." . parent::getParameterValue('release') . ".functional_descriptions.txt.gz", "gene_interactions" => "wormbase." . parent::getParameterValue('release') . ".gene_interactions.txt.gz", "gene_associations" => "wormbase." . parent::getParameterValue('release') . ".gene_association.wb", "phenotype_associations" => "wormbase." . parent::getParameterValue('release') . ".phenotype_associations.wb"); $idir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); $dataset_description = ''; $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } foreach ($files as $file) { $lfile = $idir . $local_files[$file]; $rfile = $rdir . $remote_files[$file]; if (!file_exists($lfile) or parent::getParameterValue('download') == true) { trigger_error($lfile . " not found. Will attempt to download." . PHP_EOL, E_USER_WARNING); echo "Downloading {$rfile}... "; Utils::DownloadSingle($rfile, $lfile); echo "done!" . PHP_EOL; } if (strstr($lfile, "gz")) { parent::setReadFile($lfile, TRUE); } else { parent::setReadFile($lfile, FALSE); } $suffix = parent::getParameterValue('output_format'); $ofile = "wormbase." . $file . "." . $suffix; $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false; parent::setWriteFile($odir . $ofile, $gz); echo "Processing {$file}... "; $fnx = $file; $this->{$fnx}(); echo "done!" . PHP_EOL; parent::getWriteFile()->close(); // generate the dataset release file echo "Generating dataset description for {$ofile}... "; // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("WormBase Release " . parent::getParameterValue('release') . " subset ({$file})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://wormbase.org/")->setHomepage("http://wormbase.org/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("http://www.wormbase.org/about/policies")->setDataset("http://identifiers.org/wormbase/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/wormbase/wormbase.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); echo "done!" . PHP_EOL; } parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); }
/** * create the directories we need to sync the files **/ function sync_files() { $this->setup_ftp(); $files = parent::getParameterValue('files'); if ($files == 'all') { $files = explode('|', parent::getParameterList('files')); array_shift($files); } else { $files = explode(',', parent::getParameterValue('files')); } foreach ($files as $file) { switch ($file) { case "substances": $this->sync_substances(); break; case "compounds": $this->sync_compounds(); break; case "bioassay": $this->sync_bioassay(); break; case "all": $this->sync_substances(); $this->sync_compounds(); $this->sync_bioassay(); break; } } $this->close_ftp(); }
function process() { if (parent::getParameterValue('files') == 'all') { $files = explode("|", parent::getParameterList('files')); array_shift($files); } else { $files = explode(",", parent::getParameterValue('files')); } $remote_files = array("human" => "human_genes.zip", "models" => "models_genes.zip"); $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); $dataset_description = ''; $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } foreach ($files as $file) { $lfile = $ldir . $remote_files[$file]; $rfile = $rdir . $remote_files[$file]; if (!file_exists($lfile)) { trigger_error($lfile . " not found. Will attempt to download." . PHP_EOL, E_USER_WARNING); echo "Downloading {$rfile}... "; Utils::DownloadSingle($rfile, $lfile); echo "done!" . PHP_EOL; } $suffix = parent::getParameterValue('output_format'); $ofile = "genage_" . $file . '.' . $suffix; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } $zin = new ZipArchive(); if ($zin->open($lfile) === FALSE) { trigger_error("Unable to open {$lfile}"); exit; } if ($file == "human") { $zipentry = "genage_human.csv"; } else { if ($file == "models") { $zipentry = "genage_models.csv"; } } if (($fp = $zin->getStream($zipentry)) === FALSE) { trigger_error("Unable to get {$zipentry} in ziparchive {$lfile}"); return FALSE; } parent::SetReadFile($lfile); parent::GetReadFile()->SetFilePointer($fp); // set the write file, parse, write and close $suffix = parent::getParameterValue('output_format'); $outfile = "genage_" . $file . '.' . $suffix; $gz = false; if (strstr($suffix, "gz")) { $gz = true; } parent::setWriteFile($odir . $ofile, $gz); echo "Processing {$lfile}... "; $fnx = $file; $this->{$fnx}(); echo "done!" . PHP_EOL; parent::getWriteFile()->close(); // generate the dataset release file echo "Generating dataset description for {$ofile}... "; // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Human Ageing Genomic Resources GenAge database (" . $remote_files[$file] . ")")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/comma-separated-value")->setFormat("application/gzip")->setPublisher("http://genomics.senescence.info/")->setHomepage("http://genomics.senescence.info/genes/")->setRights("use")->setLicense("http://genomics.senescence.info/legal.html")->setDataset("http://identifiers.org/genage/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/genage/genage.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); echo "done!" . PHP_EOL; } parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); }
function Run() { // get the work if ($this->GetParameterValue('files') == 'all') { $sources = explode("|", parent::getParameterList('files')); array_shift($sources); } else { // comma separated list $sources = explode(",", parent::getParameterValue('files')); } $download_files = array("h**o-sapiens" => "Pathway%20Commons%202%20homo%20sapiens.BIOPAX.owl.gz", "hprd" => "Pathway%20Commons%202%20HPRD.BIOPAX.owl.gz", "humancyc" => "Pathway%20Commons%202%20HumanCyc.BIOPAX.owl.gz", "nci-nature" => "Pathway%20Commons%202%20NCI_Nature.BIOPAX.owl.gz", "panther-pathway" => "Pathway%20Commons%202%20PANTHER%20Pathway.BIOPAX.owl.gz", "phosphositeplus" => "Pathway%20Commons%202%20PhosphoSitePlus.BIOPAX.owl.gz", "reactome" => "Pathway%20Commons%202%20Reactome.BIOPAX.owl.gz"); $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } $dataset_description = ''; // iterate over the requested data foreach ($sources as $source) { echo "processing {$source}... "; $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); // set the remote and input files $file = $source . ".owl"; $zfile = $source . ".owl.gz"; $rfile = $rdir . $download_files[$source]; $lfile = $ldir . $zfile; // download if if the file doesn't exist locally or we are told to if (!file_exists($lfile) || $this->GetParameterValue('download') == 'true') { // download echo "downloading... "; file_put_contents($lfile, file_get_contents($rfile)); } // extract the file out of the ziparchive // and load into a buffer echo 'extracting... '; if (($fpin = gzopen($lfile, "r")) === FALSE) { trigger_error("Unable to open {$lfile}", E_USER_ERROR); exit; } $data = ''; while (!gzeof($fpin)) { $buffer = gzgets($fpin, 4096); $data .= $buffer; } gzclose($fpin); // set the output file $suffix = parent::getParameterValue('output_format'); $outfile = $source . '.' . $suffix; $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } parent::setWriteFile($odir . $outfile, $gz); // send for parsing $p = new BioPAX2Bio2RDF($this); $p->SetBuffer($data)->SetBioPAXVersion(3)->SetBaseNamespace("http://purl.org/pc2/3/")->SetBio2RDFNamespace("http://bio2rdf.org/pathwaycommons:")->SetDatasetURI(parent::getDatasetURI()); $rdf = $p->Parse(); parent::addRDF($rdf); // write to output parent::writeRDFBufferToWriteFile(); parent::getWriteFile()->Close(); echo "done!" . PHP_EOL; //generate dataset description echo "Generating dataset description for {$zfile}... "; $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Pathway Commons")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("rdf/xml")->setPublisher("http://www.pathwaycommons.org/")->setHomepage("http://www.pathwaycommons.org/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("http://www.pathwaycommons.org/pc2/home.html#data_sources")->setDataset("http://identifiers.org/pathwaycommons/"); $dataset_description .= $source_file->toRDF(); echo "done!" . PHP_EOL; } echo "Generating dataset description for Bio2RDF Pathways Commons dataset... "; $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pathwaycommons/pathwaycommons.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $output_file->toRDF(); //write dataset description to file parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function run() { $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $dataset_description = ''; $files = parent::getParameterValue('files'); if ($files == 'all') { $files = explode('|', parent::getParameterList('files')); array_shift($files); } else { $files = explode(',', parent::getParameterValue('files')); } if (parent::getParameterValue('id_list') != '') { $this->idlist = explode(",", parent::getParameterValue("id_list")); } // handle genes separately if (in_array("genes", $files)) { $orgs = array("hsa"); //,"mmu","eco","dre","dme","ath","sce","ddi"); echo "processing genes" . PHP_EOL; $ofile = "kegg-genes." . parent::getParameterValue('output_format'); $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false; parent::setWriteFile($odir . $ofile, $gz); // get the list of genomes $lfile = $ldir . "genome.txt"; $rfile = parent::getParameterValue("download_url") . "list/genome"; if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') { $ret = utils::downloadSingle($rfile, $lfile); } $fp = fopen($lfile, "r"); while ($l = fgets($fp)) { $a = explode("\t", $l); $b = explode(", ", $a[1]); $org = $b[0]; if (!in_array($org, $orgs)) { continue; } // get the list of genes for this organims echo "processing {$org}" . PHP_EOL; $this->org = $org; // local variable $lfile = $ldir . $org . ".txt"; $rfile = parent::getParameterValue("download_url") . "list/{$org}"; if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') { $ret = utils::downloadSingle($rfile, $lfile); } parent::setReadFile($lfile, false); $this->process("gene"); parent::getReadFile()->close(); parent::clear(); $this->org = null; // add dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("KEGG: Gene")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/plain")->setPublisher("http://www.kegg.jp/")->setHomepage("http://www.kegg.jp/")->setRights("use")->setRights("no-commercial")->setLicense("http://www.kegg.jp/kegg/legal.html")->setDataset("http://identifiers.org/kegg/"); $dataset_description .= $source_file->toRDF(); } fclose($fp); parent::getWriteFile()->close(); echo "done" . PHP_EOL; $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = parent::getDate(filemtime($odir . $ofile)); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - Gene ")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/kegg/kegg.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $output_file->toRDF(); } // all other files foreach ($files as $db) { if ($db == "genes") { continue; } echo "processing {$db}" . PHP_EOL; $lfile = $ldir . $db . ".txt"; $rfile = parent::getParameterValue("download_url") . "list/{$db}"; if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') { echo "Downloading {$rfile} "; $ret = utils::downloadSingle($rfile, $lfile); if ($ret === false) { echo "unable to download {$file} ... skipping" . PHP_EOL; continue; } echo "done." . PHP_EOL; } // now for each list, get the individual entries $ofile = "kegg-{$db}." . parent::getParameterValue('output_format'); $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false; parent::setReadFile($lfile, false); parent::setWriteFile($odir . $ofile, $gz); $this->process($db); parent::getWriteFile()->close(); parent::getReadFile()->close(); parent::clear(); echo "done!" . PHP_EOL; // add dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("KEGG: {$db}")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/plain")->setPublisher("http://www.kegg.jp/")->setHomepage("http://www.kegg.jp/")->setRights("use")->setRights("no-commercial")->setLicense("http://www.kegg.jp/kegg/legal.html")->setDataset("http://identifiers.org/kegg/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = parent::getDate(filemtime($odir . $ofile)); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$db} ")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/kegg/kegg.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); } // write the dataset description $this->setWriteFile($odir . $this->getBio2RDFReleaseFile()); $this->getWriteFile()->write($dataset_description); $this->getWriteFile()->close(); }
function process() { if (parent::getParameterValue('files') == 'all') { $files = explode("|", parent::getParameterList('files')); array_shift($files); } else { $files = explode(",", parent::getParameterValue('files')); } $ldir = parent::getParameterValue('indir'); $rdir = parent::getParameterValue('download_url'); $odir = parent::getParameterValue('outdir'); $rfiles = array("dbxref" => "curation/chromosomal_feature/dbxref.tab", "features" => "curation/chromosomal_feature/SGD_features.tab", "domains" => "curation/calculated_protein_info/domains/domains.tab", "protein" => "curation/calculated_protein_info/protein_properties.tab", "goa" => "curation/literature/gene_association.sgd.gz", "goslim" => "curation/literature/go_slim_mapping.tab", "complex" => "curation/literature/go_protein_complex_slim.tab", "interaction" => "curation/literature/interaction_data.tab", "phenotype" => "curation/literature/phenotype_data.tab", "pathways" => "curation/literature/biochemical_pathways.tab", "mapping" => "mapping"); $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } if (parent::getParameterValue('one_file') == true) { $ofile = "sgd." . parent::getParameterValue('output_format'); parent::setWriteFile($odir . $ofile, $gz); } $dataset_description = ''; foreach ($files as $file) { $ext = substr(strrchr($rfiles[$file], '.'), 1); if ($ext == "tab") { $lfile = "sgd_" . $file . ".tab"; } elseif ($ext = "gz") { $lfile = "sgd_" . $file . ".tab.gz"; } $rfile = $rdir . $rfiles[$file]; if (!file_exists($ldir . $lfile) && parent::getParameterValue('download') == false && $file != 'mapping') { trigger_error($ldir . $lfile . " not found. Will attempt to download.", E_USER_NOTICE); Utils::DownloadSingle($rfile, $ldir . $lfile); } if (parent::getParameterValue('one_file') == false) { $ofile = "sgd_" . $file . '.' . parent::getParameterValue('output_format'); parent::setWriteFile($odir . $ofile, $gz); } //parse file parent::setReadFile($ldir . $lfile, $gz); $fnx = $file; echo "Processing {$file}... "; $this->{$fnx}(); echo PHP_EOL . "done!"; //write RDF to file parent::writeRDFBufferToWriteFile(); //close write file if (parent::getParameterValue('one_file') == false) { parent::getWriteFile()->close(); } echo PHP_EOL; // generate the dataset release file // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Saccharomyces Genome Database ({$file})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($ldir . $lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://www.yeastgenome.org/")->setHomepage("http://www.yeastgenome.org/")->setRights("use")->setLicense("http://www.stanford.edu/site/terms.html")->setDataset("http://identifiers.org/sgd/"); $dataset_description .= $source_file->toRDF(); if (parent::getParameterValue('one_file') == false) { $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sgd/sgd.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $output_file->toRDF(); } } //foreach //set graph URI back to default parent::setGraphURI($graph_uri); if (parent::getParameterValue('one_file') == true) { $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sgd/sgd.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); } //write dataset description to file echo "Generating dataset description... " . PHP_EOL; parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function Run() { $indir = parent::getParameterValue('indir'); $outdir = parent::getParameterValue('outdir'); $download_url = parent::getParameterValue('download_url'); if (parent::getParameterValue('files') == 'all') { $files = explode("|", parent::getParameterList('files')); array_shift($files); } else { $files = explode("|", parent::getParameterValue('files')); } if (parent::getParameterValue("id_list")) { $this->id_list = array_flip(explode(",", parent::getParameterValue('id_list'))); } $dataset_description = ''; foreach ($files as $f) { if ($f == 'drugbank') { $file = 'drugbank.xml.zip'; $lname = 'drugbank'; } $fnx = 'parse_' . $f; $rfile = parent::getParameterValue('download_url') . $file; $lfile = parent::getParameterValue('indir') . $file; $cfile = $lname . "." . parent::getParameterValue('output_format'); // download if (!file_exists($lfile) || parent::getParameterValue('download') == true) { utils::downloadSingle($rfile, $lfile); } // setup the write $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; parent::setWriteFile($outdir . $cfile, $gz); echo $outdir . $cfile; if (file_exists($indir . $file)) { // call the parser echo "processing {$file} ..." . PHP_EOL; $this->{$fnx}($indir, $file); echo "done" . PHP_EOL; parent::clear(); } parent::getWriteFile()->close(); // dataset description $ouri = parent::getGraphURI(); parent::setGraphURI(parent::getDatasetURI()); $source_version = parent::getDatasetVersion(); $bVersion = parent::getParameterValue('bio2rdf_release'); $prefix = parent::getPrefix(); $date = date("Y-m-d\\TH:i:sP"); // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("DrugBank ({$file})")->setRetrievedDate(date("Y-m-d\\TH:i:sP", filemtime($indir . $file)))->setFormat("application/xml")->setFormat("application/zip")->setPublisher("http://drugbank.ca")->setHomepage("http://drugbank.ca")->setRights("use")->setRights("by-attribution")->setRights("no-commercial")->setLicense("http://www.drugbank.ca/about")->setDataset("http://identifiers.org/drugbank/"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$cfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} v{$source_version}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/drugbank/drugbank.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } parent::writeToReleaseFile($source_file->toRDF() . $output_file->toRDF()); parent::setGraphURI($ouri); } parent::closeReleaseFile(); }
function run() { $idir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $files = parent::getParameterValue('files'); $dataset_description = ''; if ($files == 'all') { $files = explode('|', parent::getParameterList('files')); array_shift($files); } else { $files = explode(',', parent::getParameterValue('files')); } foreach ($files as $file) { $f = $file; if ($file != "freq") { $f = "all_" . $file; } $f = "meddra_" . $f . ".tsv.gz"; $lfile = $idir . $f; $rfile = parent::getParameterValue('download_url') . $f; if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') { echo "downloading {$file}... "; $ret = file_get_contents($rfile); if ($ret === FALSE) { trigger_error("Unable to get {$rfile}", E_USER_WARNING); continue; } $ret = file_put_contents($lfile, $ret); if ($ret === FALSE) { trigger_error("Unable to write {$lfile}", E_USER_ERROR); exit; } echo "done!" . PHP_EOL; } echo "Processing {$f}... "; parent::setReadFile($lfile, true); $suffix = parent::getParameterValue('output_format'); $ofile = "sider-" . $file . '.' . $suffix; $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } parent::setWriteFile($odir . $ofile, $gz); $this->{$file}(); parent::getWriteFile()->Close(); parent::getReadFile()->Close(); echo "done!" . PHP_EOL; echo "Generating dataset description... "; $source_file = (new DataResource($this))->setURI($rfile)->setTitle("SIDER Side Effect resource ({$file}.tsv.gz")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://sideeffects.embl.de/")->setHomepage("http://sideeffects.embl.de/")->setRights("use-share-modify")->setLicense("http://creativecommons.org/licenses/by-nc-sa/3.0/")->setDataset("http://identifiers.org/sider.effect/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = parent::getDate(filemtime($odir . $ofile)); $output_file = (new DataResource($this))->setURI("http://download.bio2df.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sider/sider.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); } //foreach parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }