function run() { $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $dd = ''; $files = parent::getParameterValue('files'); if ($files == 'all') { $files = explode('|', parent::getParameterList('files')); array_shift($files); } else { $files = explode(',', parent::getParameterValue('files')); } foreach ($files as $file) { echo "processing {$file} ..."; $lfile = $ldir . $this->filemap[$file]; $rfile = parent::getParameterValue('download_url') . $this->filemap[$file]; if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') { $ret = utils::downloadSingle($rfile, $lfile); if ($ret === false) { echo "unable to download {$file} ... skipping" . PHP_EOL; continue; } } parent::setReadFile($lfile, true); $suffix = parent::getParameterValue('output_format'); $ofile = "orphanet-" . $file . '.' . $suffix; $gz = strstr(parent::getParameterValue('output_format'), "gz") ? $gz = true : ($gz = false); /* parent::setWriteFile($odir.$ofile, $gz); $this->$file($lfile); parent::getWriteFile()->close(); */ parent::getReadFile()->close(); parent::clear(); echo "done!" . PHP_EOL; // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Orphanet: {$file}")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("application/xml")->setPublisher("http://www.orpha.net")->setHomepage("http://www.orpha.net/")->setRights("use")->setRights("sharing-modified-version-needs-permission")->setLicense("http://creativecommons.org/licenses/by-nd/3.0/")->setDataset("http://identifiers.org/orphanet/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = parent::getDate(filemtime($odir . $ofile)); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/orphanet/orphanet.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dd .= $source_file->toRDF() . $output_file->toRDF(); } //foreach parent::writeToReleaseFile($dd); }
function Run() { $file = "homologene.data"; $ldir = $this->GetParameterValue('indir'); $odir = $this->GetParameterValue('outdir'); $rdir = $this->GetParameterValue('download_url'); $lfile = $ldir . $file; if (!file_exists($lfile)) { trigger_error($file . " not found. Will attempt to download.", E_USER_NOTICE); parent::setParameterValue('download', true); } //download $rfile = $rdir . $file; if ($this->GetParameterValue('download') == true) { echo "downloading {$file} ... "; utils::downloadSingle($rfile, $lfile); } $ofile = 'homologene.' . parent::getParameterValue('output_format'); $gz = strstr(parent::getParameterValue('output_format'), "gz") ? $gz = true : ($gz = false); parent::setReadFile($lfile); parent::setWriteFile($odir . $ofile, $gz); echo "processing {$file}... "; $this->process(); echo "done!" . PHP_EOL; parent::getWriteFile()->close(); // generate the dataset release file $source_file = (new DataResource($this))->setURI($rfile)->setTitle("NCBI Homologene")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setPublisher("http://www.ncbi.nlm.nih.gov")->setHomepage("http://www.ncbi.nlm.nih.gov/homologene")->setRights("use-share-modify")->setLicense("http://www.ncbi.nlm.nih.gov/About/disclaimer.html")->setDataset("http://identifiers.org/homologene/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/homologene/homologene.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description = $source_file->toRDF() . $output_file->toRDF(); echo "Generating dataset description... "; parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function Run() { echo "processing miriam database"; // directory shortcuts $ldir = $this->getParameterValue('indir'); $odir = $this->getParameterValue('outdir'); // download and set the read file $file = 'miriam.xml'; $rfile = $this->getParameterValue("download_url"); $lfile = $ldir . $file; if (!file_exists($lfile) || $this->getParameterValue("download") == "true") { utils::downloadSingle($rfile, $lfile); } parent::setReadFile($lfile); // set the write file $outfile = "miriam." . parent::getParameterValue('output_format'); $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; parent::setWriteFile(parent::getParameterValue("outdir") . $outfile, $gz); $this->parse(); parent::WriteRDFBufferToWriteFile(); $this->getWriteFile()->Close(); return true; }
function Run() { $sp = trim(parent::getParameterValue('files')); if ($sp == 'all') { $files = $this->getPackageMap(); } else { $s_a = explode(",", $sp); $pm = $this->getPackageMap(); $files = array(); foreach ($s_a as $a) { if (array_key_exists($a, $pm)) { $files[$a] = $pm[$a]; } } } //else $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $dd = ''; //now iterate over the files array $year = parent::getParameterValue('year'); foreach ($files as $k => $fpattern) { $file = str_replace("YEAR", $year, $fpattern); $lfile = $ldir . $file; $rfile = parent::getParameterValue("download_url") . $file; // download if necessary if (!file_exists($lfile) || parent::getParameterValue('download') == "true") { echo "Downloading {$file} ... "; $ret = utils::downloadSingle($rfile, $lfile); if ($ret === FALSE) { trigger_error("Unable to get {$file}", E_USER_ERROR); continue; } echo "done!" . PHP_EOL; } //set the outfile $ofile = "mesh_" . $k . "." . parent::getParameterValue('output_format'); $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false; echo "processing {$k} ..."; parent::setReadFile($lfile, FALSE); parent::setWriteFile($odir . $ofile, $gz); $fnx = $k; $this->{$fnx}(); parent::writeRDFBufferToWriteFile(); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; $source_file = (new DataResource($this))->setURI($rfile)->setTitle("MeSH")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/x-mesh-record")->setPublisher("http://www.nlm.nih.gov")->setHomepage("http://www.nlm.nih.gov/mesh/")->setRights("use")->setLicense("http://www.nlm.nih.gov/databases/download.html")->setDataset("http://identifiers.org/mesh/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = parent::getDate(filemtime($odir . $ofile)); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/mesh/mesh.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dd .= $source_file->toRDF() . $output_file->toRDF(); } //foreach parent::setWriteFile($odir . $this->getBio2RDFReleaseFile($this->getNamespace())); parent::getWriteFile()->write($dd); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function process($db) { $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); while ($l = parent::getReadFile()->read()) { list($nsid, $name) = explode("\t", $l); list($ns, $id) = explode(":", $nsid); if (isset($this->idlist) and !in_array($id, $this->idlist)) { continue; } if (isset($this->org)) { $id = $ns . "_" . $id; } $uri = $this->getNamespace() . $id; parent::addRDF(parent::describeIndividual($uri, $name, parent::getVoc() . ucfirst($db)) . parent::describeClass(parent::getVoc() . ucfirst($db), "KEGG {$db}") . parent::triplifyString($uri, parent::getVoc() . "internal-id", $nsid)); // now get the entries for each $lfile = $ldir . $id . ".txt"; $rfile = parent::getParameterValue("download_url") . "get/{$nsid}"; if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') { echo "downloading {$nsid} "; $ret = utils::downloadSingle($rfile, $lfile); if ($ret === false) { echo "unable to download " . $nsid . " ... skipping" . PHP_EOL; continue; } echo "done. "; } echo "parsing {$nsid} ... "; $this->parseEntry($lfile); parent::writeRDFBufferToWriteFile(); if ($db === "pathway") { $ko = str_replace("map", "ko", $id); $lfile = $ldir . $id . ".kgml"; $rfile = "http://www.kegg.jp/kegg-bin/download?entry=" . $ko . "&format=kgml"; if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') { echo "downloading KGML for {$nsid} "; $ret = utils::downloadSingle($rfile, $lfile); if ($ret === false) { echo "unable to download " . $nsid . " ... skipping" . PHP_EOL; continue; } echo "done. "; } $this->parseKGML($lfile); parent::writeRDFBufferToWriteFile(); } echo "done!" . PHP_EOL; } }
public function Run() { $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); // make sure we have the zip archive //which files are to be converted? $selectedPackage = trim(parent::getParameterValue('files')); if ($selectedPackage == 'all') { $files = $this->getPackageMap(); } else { $sel_arr = explode(",", $selectedPackage); $pm = $this->getPackageMap(); $files = array(); foreach ($sel_arr as $a) { if (array_key_exists($a, $pm)) { $files[$a] = $pm[$a]; } } } $dataset_description = ''; foreach ($files as $key => $value) { $lfile = $ldir . $value['filename']; if (!file_exists($lfile) && parent::getParameterValue('download') == false) { trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE); $this->SetParameterValue('download', true); } //download all files [except mapping file] if ($this->GetParameterValue('download') == true) { $rfile = $value["file_url"]; echo "downloading " . var_dump($value["file_url"]) . " ... "; utils::downloadSingle($rfile, $lfile); } if ($key == "taxdmp" || $key == "gi2taxid_protein" || $key == "gi2taxid_nucleotide") { //get the name of the zip archive $lfile = $value["filename"]; // make sure we have the zip archive $zinfile = $ldir . $lfile; $zin = new ZipArchive(); if ($zin->open($zinfile) === FALSE) { trigger_error("Unable to open {$zinfile}"); exit; } //now iterate over the files in the ziparchive $source_file = (new DataResource($this))->setURI($value['file_url'])->setTitle('NCBI Taxonomy - ' . $key)->setRetrievedDate(date("Y-m-d\\TH:i:sP", filemtime($ldir . $lfile)))->setFormat('text/tab-separated-value')->setFormat('application/zip')->setPublisher('http://www.ncbi.nlm.nih.gov')->setHomepage('http://www.ncbi.nlm.nih.gov/taxonomy')->setRights('use')->setRights('attribution')->setLicense('https://www.nlm.nih.gov/copyright.html')->setDataset(parent::getDatasetURI()); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TH:i:sP"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$key}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/taxonomy/taxonomy.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("restricted-by-source-license")->setLicense("http://creativecommons/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); $dataset_description .= $output_file->toRDF() . $source_file->toRDF(); foreach ($value["contents"] as $k => $fn) { if ($k == "names" || $k == "nodes" || $k == "citations" || $k == "gencode" || $k == "division" || $k == "gi_taxid_prot" || $k == "gi_taxid_nucl") { //if($k !== 'citations') continue; $fpin = $zin->getStream($fn); if (!$fpin) { trigger_error("Unable to get pointer to {$fn} in {$zinfile}"); exit("failed\n"); } $gzoutfile = $odir . "taxonomy-{$k}" . "." . parent::getParameterValue('output_format'); //set the write file $gz = strstr(parent::getParameterValue('output_format'), 'gz') ? true : false; parent::setReadFile($ldir . $lfile); parent::getReadFile()->SetFilePointer($fpin); parent::setWriteFile($gzoutfile, $gz); echo "processing {$fn}...\n"; $this->{$k}(); $this->GetWriteFile()->Close(); echo "done!" . PHP_EOL; parent::clear(); } //if $k } //foreach } //if key taxdmp $this->setWriteFile($odir . $this->getBio2RDFReleaseFile()); $this->getWriteFile()->write($dataset_description); $this->getWriteFile()->close(); } }
function Run() { $indir = parent::getParameterValue('indir'); $outdir = parent::getParameterValue('outdir'); $download_url = parent::getParameterValue('download_url'); if (parent::getParameterValue('files') == 'all') { $files = explode("|", parent::getParameterList('files')); array_shift($files); } else { $files = explode("|", parent::getParameterValue('files')); } if (parent::getParameterValue("id_list")) { $this->id_list = array_flip(explode(",", parent::getParameterValue('id_list'))); } $dataset_description = ''; foreach ($files as $f) { if ($f == 'drugbank') { $file = 'drugbank.xml.zip'; $lname = 'drugbank'; } $fnx = 'parse_' . $f; $rfile = parent::getParameterValue('download_url') . $file; $lfile = parent::getParameterValue('indir') . $file; $cfile = $lname . "." . parent::getParameterValue('output_format'); // download if (!file_exists($lfile) || parent::getParameterValue('download') == true) { utils::downloadSingle($rfile, $lfile); } // setup the write $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; parent::setWriteFile($outdir . $cfile, $gz); echo $outdir . $cfile; if (file_exists($indir . $file)) { // call the parser echo "processing {$file} ..." . PHP_EOL; $this->{$fnx}($indir, $file); echo "done" . PHP_EOL; parent::clear(); } parent::getWriteFile()->close(); // dataset description $ouri = parent::getGraphURI(); parent::setGraphURI(parent::getDatasetURI()); $source_version = parent::getDatasetVersion(); $bVersion = parent::getParameterValue('bio2rdf_release'); $prefix = parent::getPrefix(); $date = date("Y-m-d\\TH:i:sP"); // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("DrugBank ({$file})")->setRetrievedDate(date("Y-m-d\\TH:i:sP", filemtime($indir . $file)))->setFormat("application/xml")->setFormat("application/zip")->setPublisher("http://drugbank.ca")->setHomepage("http://drugbank.ca")->setRights("use")->setRights("by-attribution")->setRights("no-commercial")->setLicense("http://www.drugbank.ca/about")->setDataset("http://identifiers.org/drugbank/"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$cfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} v{$source_version}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/drugbank/drugbank.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } parent::writeToReleaseFile($source_file->toRDF() . $output_file->toRDF()); parent::setGraphURI($ouri); } parent::closeReleaseFile(); }
function run() { $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); // get the snps from pharmgkb $snps = explode(",", parent::getParameterValue('files')); if ($snps[0] == 'all') { $snps = $this->getSNPs(); } else { if ($snps[0] == 'clinical') { $snps = $this->getSNPs(true); } else { if ($snps[0] == 'omim') { $lfile = $ldir . 'snp_omimvar.txt'; if (!file_exists($lfile) || parent::getParameterValue('download') == true) { $ret = utils::DownloadSingle('ftp://ftp.ncbi.nlm.nih.gov/snp/Entrez/snp_omimvar.txt', $lfile); } $snps = $this->processOMIMVar($lfile); } else { if ($snps[0] == 'pharmgkb') { $lfile = $ldir . 'pharmgkb.snp.zip'; if (!file_exists($lfile) || parent::getParameterValue('download') == true) { $ret = utils::DownloadSingle('http://www.pharmgkb.org/download.do?objId=rsid.zip&dlCls=common', $lfile); } $snps = $this->processPharmGKBSnps($lfile); } } } } $outfile = $odir . "dbsnp." . parent::getParameterValue('output_format'); $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; parent::setWriteFile($outfile, $gz); $n = count($snps); $z = 0; foreach ($snps as $i => $snp) { $file = $snp . '.xml.gz'; $infile = $ldir . $file; $rfile = parent::getParameterValue('download_url') . $snp; //$outfile = $odir.$snp.".".parent::getParameterValue('output_format'); // check if exists $download = false; if (!file_exists($infile)) { //trigger_error($lfile." not found. Will attempt to download. ", E_USER_NOTICE); parent::setParameterValue('download', true); } // download if (parent::getParameterValue('download') == true) { trigger_error("Downloading {$file}", E_USER_NOTICE); $ret = utils::downloadSingle($rfile, "compress.zlib://" . $infile, true); if ($ret === false) { continue; } } // process echo "Processing {$snp} (" . ($i + 1) . "/{$n})" . PHP_EOL; $this->parse($infile); parent::writeRDFBufferToWriteFile(); if ($z++ % 10000 == 0) { parent::clear(); } } parent::getWriteFile()->close(); // generate the dataset description file $source_file = (new DataResource($this))->setURI($rfile)->setTitle("dbSNP " . parent::getDatasetVersion())->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z"))->setFormat("application/xml")->setPublisher("http://www.ncbi.nlm.nih.gov")->setHomepage("http://www.ncbi.nlm.nih.gov/SNP/")->setRights("use-share-modify")->setLicense("http://www.ncbi.nlm.nih.gov/About/disclaimer.html")->setDataset("http://identifiers.org/dbsnp/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/dbsnp/dbsnp.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description = $source_file->toRDF() . $output_file->toRDF(); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); }