public function Run() { $dataset_description = ''; $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); //first get the files that are to be processed $selectedPackage = trim(parent::getParameterValue('files')); if ($selectedPackage == 'all') { $files = $this->getPackageMap(); } else { $sel_arr = explode(",", $selectedPackage); $pm = $this->getPackageMap(); $files = array(); foreach ($sel_arr as $a) { if (array_key_exists($a, $pm)) { $files[$a] = $pm[$a]; } } } //download if ($this->getParameterValue('download')) { foreach ($files as $aP => $fn) { foreach ($fn as $aFn) { echo "downloading file {$aFn} :" . parent::getParameterValue('download_url') . $aFn . "..." . PHP_EOL; file_put_contents($ldir . $aFn, file_get_contents(parent::getParameterValue('download_url') . $aFn)); } } } //iterate over the files $paths = $this->getFilePaths($ldir, 'gz'); $lfile = null; foreach ($files as $k => $val) { foreach ($val as $fn) { if (in_array($fn, $paths)) { $lfile = $fn; $ofile = $odir . basename($fn, ".gz") . "." . parent::getParameterValue('output_format'); $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } parent::setWriteFile($ofile, $gz); parent::setReadFile($ldir . $lfile, true); $source_file = (new DataResource($this))->setURI(parent::getParameterValue('download_url') . basename($fn))->setTitle('International Protein Index filename: ' . basename($fn))->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($ldir . $lfile)))->setFormat('text/ipi-format')->setFormat('application/zip')->setPublisher('https://www.ebi.ac.uk')->setHomepage('https://www.ebi.ac.uk/IPI')->setRights('use')->setRights('attribution')->setLicense('https://www.ebi.ac.uk')->setDataset(parent::getDatasetURI()); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ipi/ipi.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("restricted-by-source-license")->setLicense("http://creativecommons/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); $dataset_description .= $output_file->toRDF() . $source_file->toRDF(); echo "processing {$fn} ..."; $this->{$k}(); echo "done!" . PHP_EOL; $this->setWriteFile($odir . $this->getBio2RDFReleaseFile()); $this->getWriteFile()->write($dataset_description); $this->getWriteFile()->close(); } } } }
function Run() { $idir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $files = parent::getParameterValue('files'); if ($files == 'all') { $list = explode('|', parent::getParameterList('files')); array_shift($list); } else { $list = explode(',', parent::getParameterValue('files')); } $dataset_description = ''; foreach ($list as $item) { $lfile = $idir . $item . '.rpt'; $rfile = parent::getParameterValue('download_url') . $item . '.rpt'; if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') { echo "downloading {$item}..."; $ret = Utils::DownloadSingle($rfile, $lfile); if ($ret != true) { continue; } } parent::setReadFile($lfile, true); echo "Processing {$item}..."; $ofile = $odir . $item . '.' . parent::getParameterValue('output_format'); $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false; parent::setWriteFile($ofile, $gz); $this->{$item}(); parent::getWriteFile()->close(); parent::getReadFile()->close(); echo "Done" . PHP_EOL; parent::clear(); $source_file = (new DataResource($this))->setURI($rfile)->setTitle("MGI {$item}")->setRetrievedDate(date("Y-m-d\\TH:i:s", filemtime($lfile)))->setFormat("text")->setPublisher("http://www.informatics.jax.org")->setHomepage("http://www.informatics.jax.org")->setRights("use")->setLicense("http://www.informatics.jax.org/mgihome/other/copyright.shtml")->setDataset("http://identifiers.org/mgi/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TH:i:s"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$item} in {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/mgi/mgi.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); } //foreach // generate the dataset release file $this->setWriteFile($odir . parent::getBio2RDFReleaseFile()); $this->getWriteFile()->write($dataset_description); $this->getWriteFile()->close(); echo "done!" . PHP_EOL; }
function run() { $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $dd = ''; $files = parent::getParameterValue('files'); if ($files == 'all') { $files = explode('|', parent::getParameterList('files')); array_shift($files); } else { $files = explode(',', parent::getParameterValue('files')); } foreach ($files as $file) { echo "processing {$file} ..."; $lfile = $ldir . $this->filemap[$file]; $rfile = parent::getParameterValue('download_url') . $this->filemap[$file]; if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') { $ret = utils::downloadSingle($rfile, $lfile); if ($ret === false) { echo "unable to download {$file} ... skipping" . PHP_EOL; continue; } } parent::setReadFile($lfile, true); $suffix = parent::getParameterValue('output_format'); $ofile = "orphanet-" . $file . '.' . $suffix; $gz = strstr(parent::getParameterValue('output_format'), "gz") ? $gz = true : ($gz = false); /* parent::setWriteFile($odir.$ofile, $gz); $this->$file($lfile); parent::getWriteFile()->close(); */ parent::getReadFile()->close(); parent::clear(); echo "done!" . PHP_EOL; // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Orphanet: {$file}")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("application/xml")->setPublisher("http://www.orpha.net")->setHomepage("http://www.orpha.net/")->setRights("use")->setRights("sharing-modified-version-needs-permission")->setLicense("http://creativecommons.org/licenses/by-nd/3.0/")->setDataset("http://identifiers.org/orphanet/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = parent::getDate(filemtime($odir . $ofile)); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/orphanet/orphanet.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dd .= $source_file->toRDF() . $output_file->toRDF(); } //foreach parent::writeToReleaseFile($dd); }
function Run() { $file = "homologene.data"; $ldir = $this->GetParameterValue('indir'); $odir = $this->GetParameterValue('outdir'); $rdir = $this->GetParameterValue('download_url'); $lfile = $ldir . $file; if (!file_exists($lfile)) { trigger_error($file . " not found. Will attempt to download.", E_USER_NOTICE); parent::setParameterValue('download', true); } //download $rfile = $rdir . $file; if ($this->GetParameterValue('download') == true) { echo "downloading {$file} ... "; utils::downloadSingle($rfile, $lfile); } $ofile = 'homologene.' . parent::getParameterValue('output_format'); $gz = strstr(parent::getParameterValue('output_format'), "gz") ? $gz = true : ($gz = false); parent::setReadFile($lfile); parent::setWriteFile($odir . $ofile, $gz); echo "processing {$file}... "; $this->process(); echo "done!" . PHP_EOL; parent::getWriteFile()->close(); // generate the dataset release file $source_file = (new DataResource($this))->setURI($rfile)->setTitle("NCBI Homologene")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setPublisher("http://www.ncbi.nlm.nih.gov")->setHomepage("http://www.ncbi.nlm.nih.gov/homologene")->setRights("use-share-modify")->setLicense("http://www.ncbi.nlm.nih.gov/About/disclaimer.html")->setDataset("http://identifiers.org/homologene/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/homologene/homologene.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description = $source_file->toRDF() . $output_file->toRDF(); echo "Generating dataset description... "; parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function run() { $dataset_description = ''; $ldir = parent::GetParameterValue('indir'); $odir = parent::GetParameterValue('outdir'); //download if ($this->GetParameterValue('download') == true) { $list = $this->getFtpFileList('ftp.ncbi.nih.gov'); $total = count($list); $counter = 1; foreach ($list as $f) { echo "downloading file {$counter} out of {$total} :" . parent::getParameterValue('download_url') . $f . "... " . PHP_EOL; file_put_contents($ldir . $f, file_get_contents(parent::GetParameterValue('download_url') . $f)); $counter++; } } //if download //iterate over the files $paths = $this->getFilePaths($ldir, 'gz'); $lfile = null; foreach ($paths as $aPath) { $lfile = $aPath; $ofile = $odir . basename($aPath, ".gz") . "." . parent::getParameterValue('output_format'); $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } parent::setWriteFile($ofile, $gz); parent::setReadFile($ldir . $lfile, true); $source_file = (new DataResource($this))->setURI(parent::getParameterValue('download_url') . basename($aPath))->setTitle('NCBI UniSTS filename: ' . basename($aPath))->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($ldir . $lfile)))->setFormat('xml/unists-format')->setFormat('application/zip')->setPublisher('https://www.ncbi.nlm.nih.gov')->setHomepage('https://www.ncbi.nlm.nih.gov/unists')->setRights('use')->setRights('attribution')->setLicense('https://www.nlm.nih.gov/copyright.html')->setDataset(parent::getDatasetURI()); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/unists/unists.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("restricted-by-source-license")->setLicense("http://creativecommons/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); $dataset_description .= $output_file->toRDF() . $source_file->toRDF(); echo "processing {$aPath} ..."; $this->process(); echo "done!" . PHP_EOL; $this->setWriteFile($odir . $this->getBio2RDFReleaseFile()); $this->getWriteFile()->write($dataset_description); $this->getWriteFile()->close(); } //foreach }
function Run() { $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); //check dependencies $d = $this->checkDependencies(); if ($d) { //build pdb2rdf echo "building pdb2rdf..." . PHP_EOL; $cmd = "mvn clean install -DskipTests -f " . __DIR__ . "/pom.xml"; $build_out = shell_exec($cmd); $out_ver = $this->verifyMavenBuildOutput($build_out); if ($out_ver) { //now check if download is desired if ($this->getParameterValue('download')) { if (!$this->downloadFiles($ldir)) { trigger_error("Not all files downloaded!", E_USER_WARNING); } } //extract pdb2rdf-cli from the target directory if (!$this->extractCli()) { trigger_error("Could not extract pdb2rdf!", E_USER_ERROR); } //now get ready to run pdb2rdf.sh if (!$this->runPdb2Rdf($ldir, $odir)) { trigger_error("Could not run Pdb2RDF correctly!", E_USER_ERROR); exit; } else { echo "done!\n"; } } else { trigger_error("Could not build pdb2rdf. Please try manually!", E_USER_ERROR); } } else { trigger_error("Dependencies not met!", E_USER_ERROR); exit; } }
function Run() { echo "processing miriam database"; // directory shortcuts $ldir = $this->getParameterValue('indir'); $odir = $this->getParameterValue('outdir'); // download and set the read file $file = 'miriam.xml'; $rfile = $this->getParameterValue("download_url"); $lfile = $ldir . $file; if (!file_exists($lfile) || $this->getParameterValue("download") == "true") { utils::downloadSingle($rfile, $lfile); } parent::setReadFile($lfile); // set the write file $outfile = "miriam." . parent::getParameterValue('output_format'); $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; parent::setWriteFile(parent::getParameterValue("outdir") . $outfile, $gz); $this->parse(); parent::WriteRDFBufferToWriteFile(); $this->getWriteFile()->Close(); return true; }
function Run() { $idir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $files = parent::getParameterValue('files'); // set the work if ($files != 'all') { // check if comma-separated, or hyphen-range $list = explode(",", $files); if (count($list) == 1) { // try hyphen separated $range = explode("-", $files); if (count($range) == 2) { for ($i = $range[0]; $i <= $range[1]; $i++) { $myfiles[] = $i; } } else { // must a single entry $myfiles[] = $files; } } else { $myfiles = $list; } } $rest_uri = 'http://sabiork.h-its.org/sabioRestWebServices/'; $getReactionIds_url = $rest_uri . "suggestions/SABIOReactionIDs"; $reaction_list_file = $idir . "reactions.xml"; if (!file_exists($reaction_list_file) || parent::getParameterValue('download') == 'true') { $xml = file_get_contents($getReactionIds_url); if (FALSE === $reaction_list_file) { exit; } $f = new FileFactory($reaction_list_file); $f->Write($xml); $f->Close(); } $xml = simplexml_load_file($reaction_list_file); $total = count($xml->SABIOReactionID); if (isset($myfiles)) { $total = count($myfiles); } $i = 0; parent::setCheckpoint('dataset'); $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } $suffix = parent::getParameterValue('output_format'); $ofile = "sabiork." . $suffix; $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false; parent::setWriteFile($odir . $ofile, $gz); foreach ($xml->SABIOReactionID as $rid) { parent::setCheckpoint('file'); if (isset($myfiles)) { if (!in_array($rid, $myfiles)) { continue; } } $i++; echo "{$i} / {$total} : reaction {$rid}" . PHP_EOL; $reaction_file = $idir . "reaction_" . $rid . ".owl.gz"; if (!file_exists($reaction_file) || $this->GetParameterValue('download') == 'true') { $url = $rest_uri . 'searchKineticLaws/biopax?q=SabioReactionID:' . $rid; $data = file_get_contents($url); if ($data === FALSE) { continue; } $f = new FileFactory($reaction_file, true); $f->Write($data); $f->Close(); } $buf = file_get_contents("compress.zlib://" . $reaction_file); // send for parsing $p = new BioPAX2Bio2RDF($this); $p->SetBuffer($buf)->SetBioPAXVersion(3)->SetBaseNamespace("http://sabio.h-its.org/biopax#")->SetBio2RDFNamespace("http://bio2rdf.org/sabiork:")->SetDatasetURI($this->GetDatasetURI()); $rdf = $p->Parse(); parent::getWriteFile()->Write($rdf); } parent::getWriteFile()->Close(); //generate dataset description echo "Generating dataset description... "; $source_file = (new DataResource($this))->setURI("http://sabiork.h-its.org/sabioRestWebServices/searchKineticLaws/biopax")->setTitle("SABIO-RK Biochemical Reaction Kinetics Database")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($odir . $ofile)))->setFormat("text/xml")->setPublisher("http://sabio.villa-bosch.de/")->setHomepage("http://sabio.villa-bosch.de/")->setRights("use-share-modify")->setRights("no-commercial")->setLicense("http://sabio.villa-bosch.de/layouts/content/termscondition.gsp")->setDataset("http://identifiers.org/sabiork.reaction/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sabiork/sabiork.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description = $source_file->toRDF() . $output_file->toRDF(); //write dataset description to file parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function Run() { $ldir = $this->GetParameterValue('indir'); $odir = $this->GetParameterValue('outdir'); $rfile = $this->GetParameterValue('download_url'); $lfile = substr($rfile, strrpos($rfile, "/") + 1); // check if exists if (!file_exists($ldir . $lfile) or parent::getParameterValue('download') == 'true') { echo "dowloading {$rfile} ..."; trigger_error("Will attempt to download ", E_USER_NOTICE); Utils::DownloadSingle($rfile, $ldir . $lfile); echo "done" . PHP_EOL; } // make sure we have the zip archive $zin = new ZipArchive(); if ($zin->open($ldir . $lfile) === FALSE) { trigger_error("Unable to open {$ldir}{$lfile}"); exit; } // get the work if ($this->GetParameterValue('files') == 'all') { $files = explode("|", $this->GetParameterList('files')); array_shift($files); } else { $files = explode("|", $this->GetParameterValue('files')); } $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } $outfile = "ndc." . parent::getParameterValue('output_format'); parent::setWriteFile($odir . $outfile, $gz); // now go through each item in the zip file and process foreach ($files as $file) { echo "Processing {$file}... "; $fpin = $zin->getStream($file . ".txt"); if (!$fpin) { trigger_error("Unable to get pointer to {$file} in {$ldir}{$lfile}", E_USER_ERROR); return FALSE; } $this->{$file}($fpin); parent::writeRDFBufferToWriteFile(); echo "done!" . PHP_EOL; } parent::getWriteFile()->close(); echo "Generating dataset description for {$outfile}... "; //start generating dataset description file $dataset_description = ''; $source_file = (new DataResource($this))->setURI($rfile)->setTitle("FDA National Drug Code Directory")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($ldir . $lfile)))->setFormat("text/tab-separated-value")->setFormat("application/zip")->setPublisher("http://www.fda.gov")->setHomepage("http://www.fda.gov/Drugs/InformationOnDrugs/ucm142438.htm")->setRights("use-share")->setLicense(null)->setDataset("http://identifiers.org/ndc/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ndc/ndc.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description = $source_file->toRDF() . $output_file->toRDF(); //write dataset description to file parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function run() { // get the file list if ($this->GetParameterValue('files') == 'all') { $files = explode("|", $this->GetParameterList('files')); array_shift($files); } else { $files = explode(",", $this->GetParameterValue('files')); } if ($this->getParameterValue('additional') != 'none') { $f = explode(",", $this->getParameterValue('additional')); $files = array_merge($files, $f); } $ldir = $this->GetParameterValue('indir'); $odir = $this->GetParameterValue('outdir'); $rdir = $this->GetParameterValue('download_url'); $dataset_description = ''; foreach ($files as $file) { $suffix = ".zip"; $lfile = $ldir . $file . $suffix; $rfile = $rdir . $file . $suffix; if ($file == "offsides" and !file_exists($lfile)) { echo "downloading twosides..."; $rfile = "http://www.pharmgkb.org/redirect.jsp?p=ftp%3A%2F%2Fftpuserd%3AGKB4ftp%40ftp.pharmgkb.org%2Fdownload%2Ftatonetti%2F3003377s-offsides.zip"; utils::DownloadSingle($rfile, $lfile); echo "done" . PHP_EOL; } elseif ($file == "twosides" and !file_exists($lfile)) { echo "downloading {$file} ..."; $rfile = "http://www.pharmgkb.org/redirect.jsp?p=ftp%3A%2F%2Fftpuserd%3AGKB4ftp%40ftp.pharmgkb.org%2Fdownload%2Ftatonetti%2F3003377s-twosides.zip"; utils::DownloadSingle($rfile, $lfile); echo "done" . PHP_EOL; } elseif ($file == 'annotations' or $file == 'relationships') { if (!file_exists($lfile)) { echo "Contact PharmGKB to get access to variants/clinical variants; save file as annotations.zip" . PHP_EOL; continue; } } else { if (!file_exists($lfile) or parent::getParameterValue('download') == true) { echo "Downloading {$lfile} ... "; Utils::DownloadSingle('https://www.pharmgkb.org/download.do?objId=' . $file . '.zip&dlCls=common', $lfile); echo "done" . PHP_EOL; } } // get a pointer to the file in the zip archive if (!file_exists($lfile)) { echo "no local copy of {$lfile} . skipping" . PHP_EOL; continue; } $zin = new ZipArchive(); if ($zin->open($lfile) === FALSE) { trigger_error("Unable to open {$lfile}"); exit; } $zipentries = array(); if ($file == "annotations") { // exclude: 'clinical_ann.tsv','study_parameters.tsv' $zipentries = array('clinical_ann_metadata.tsv', 'var_drug_ann.tsv', 'var_pheno_ann.tsv', 'var_fa_ann.tsv'); } else { if ($file == "pathways") { for ($i = 0; $i < $zin->numFiles; $i++) { $stat = $zin->statIndex($i); $entry = $stat['name']; $ext = pathinfo($entry, PATHINFO_EXTENSION); if ($ext != "txt") { $zipentries[] = $entry; } } } else { if ($file == "relationships") { $zipentries = array("relationships.tsv"); } else { if ($file == 'offsides') { $zipentries = array('3003377s-offsides.tsv'); } else { if ($file == 'twosides') { $zipentries = array('3003377s-twosides.tsv'); } else { $zipentries = array($file . ".tsv"); } } } } } // set the write file, parse, write and close $suffix = parent::getParameterValue('output_format'); $outfile = $file . '.' . $suffix; $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } $this->SetWriteFile($odir . $outfile, $gz); foreach ($zipentries as $zipentry) { if (($fp = $zin->getStream($zipentry)) === FALSE) { trigger_error("Unable to get {$file}.tsv in ziparchive {$lfile}"); return FALSE; } $this->SetReadFile($lfile); $this->GetReadFile()->SetFilePointer($fp); if ($file == "annotations") { $fnx = substr($zipentry, 0, strpos($zipentry, ".tsv")); echo "processing {$zipentry}.."; } else { if ($file == 'pathways') { $fnx = 'pathways'; echo "processing {$fnx} ({$zipentry})... "; } else { $fnx = $file; echo "processing {$fnx} ... "; } } $this->{$fnx}(); parent::writeRDFBufferToWriteFile(); parent::clear(); echo "done!" . PHP_EOL; // generate the dataset release file $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Pharmacogenomics Knowledge Base ({$zipentry})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/zip")->setPublisher("http://www.pharmgkb.org/")->setHomepage("http://www.pharmgkb.org/")->setRights("use")->setRights("no-commercial")->setLicense("http://www.pharmgkb.org/page/policies")->setDataset("http://identifiers.org/pharmgkb/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} {$file} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pharmgkb/pharmgkb.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); } $this->GetWriteFile()->Close(); } // foreach echo "Generating dataset description... "; parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function Run() { $file = "hgnc_complete_set.txt.gz"; $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); $lfile = $ldir . $file; if (!file_exists($lfile) && parent::getParameterValue('download') == false) { trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE); parent::setParameterValue('download', true); } //download the hgnc file $rfile = null; if (parent::getParameterValue('download') == true) { $rfile = $rdir; echo "downloading {$file} ... "; Utils::DownloadSingle($rfile, $lfile); } $ofile = $odir . "hgnc." . parent::getParameterValue('output_format'); $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } parent::setWriteFile($ofile, $gz); parent::setReadFile($lfile, true); echo "processing {$file}... "; $this->process(); echo "done!" . PHP_EOL; //close write file parent::getWriteFile()->close(); echo PHP_EOL; // generate the dataset release file echo "generating dataset release file... "; $dataset_description = ''; $source_file = (new DataResource($this))->setURI($rdir)->setTitle('HUGO Gene Nomenclature Committee (HGNC)')->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat('text/tab-separated-value')->setFormat('application/zip')->setPublisher('http://www.genenames.org/')->setHomepage('http://www.genenames.org/data/gdlw_columndef.html')->setRights('use')->setRights('attribution')->setLicense('http://www.genenames.org/about/overview')->setDataset(parent::getDatasetURI()); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/hgnc/hgnc.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("restricted-by-source-license")->setLicense("http://creativecommons/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); $this->setWriteFile($odir . $this->getBio2RDFReleaseFile()); $this->getWriteFile()->write($dataset_description); $this->getWriteFile()->close(); }
public function run() { if (parent::getParameterValue('files') == 'all') { $files = explode("|", parent::getParameterList('files')); array_shift($files); } else { $files = explode(",", parent::getParameterValue('files')); } $release = parent::getParameterValue('release'); $releaseb = "WS249"; $remote_files = array("geneIDs" => "species/c_elegans/annotation/geneIDs/c_elegans.PRJNA13758." . $release . ".geneIDs.txt.gz", "functional_descriptions" => "species/c_elegans/annotation/functional_descriptions/c_elegans.PRJNA13758." . $release . ".functional_descriptions.txt.gz", "gene_interactions" => "species/c_elegans/annotation/gene_interactions/c_elegans.PRJNA13758." . $release . ".gene_interactions.txt.gz", "gene_associations" => "releases/current-production-release/ONTOLOGY/gene_association." . $releaseb . ".wb", "phenotype_associations" => "releases/current-production-release/ONTOLOGY/phenotype_association." . $releaseb . ".wb"); $local_files = array("geneIDs" => "wormbase." . parent::getParameterValue('release') . ".genes.txt.gz", "functional_descriptions" => "wormbase." . parent::getParameterValue('release') . ".functional_descriptions.txt.gz", "gene_interactions" => "wormbase." . parent::getParameterValue('release') . ".gene_interactions.txt.gz", "gene_associations" => "wormbase." . parent::getParameterValue('release') . ".gene_association.wb", "phenotype_associations" => "wormbase." . parent::getParameterValue('release') . ".phenotype_associations.wb"); $idir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); $dataset_description = ''; $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } foreach ($files as $file) { $lfile = $idir . $local_files[$file]; $rfile = $rdir . $remote_files[$file]; if (!file_exists($lfile) or parent::getParameterValue('download') == true) { trigger_error($lfile . " not found. Will attempt to download." . PHP_EOL, E_USER_WARNING); echo "Downloading {$rfile}... "; Utils::DownloadSingle($rfile, $lfile); echo "done!" . PHP_EOL; } if (strstr($lfile, "gz")) { parent::setReadFile($lfile, TRUE); } else { parent::setReadFile($lfile, FALSE); } $suffix = parent::getParameterValue('output_format'); $ofile = "wormbase." . $file . "." . $suffix; $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false; parent::setWriteFile($odir . $ofile, $gz); echo "Processing {$file}... "; $fnx = $file; $this->{$fnx}(); echo "done!" . PHP_EOL; parent::getWriteFile()->close(); // generate the dataset release file echo "Generating dataset description for {$ofile}... "; // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("WormBase Release " . parent::getParameterValue('release') . " subset ({$file})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://wormbase.org/")->setHomepage("http://wormbase.org/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("http://www.wormbase.org/about/policies")->setDataset("http://identifiers.org/wormbase/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/wormbase/wormbase.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); echo "done!" . PHP_EOL; } parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); }
function Run() { // get the file list if (parent::getParameterValue('files') == 'all') { $files = array('all'); } else { $files = explode(",", parent::getParameterValue('files')); } $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); $dataset_description = ''; foreach ($files as $file) { $download = parent::getParameterValue('download'); $version = parent::getParameterValue("version"); $zip_file = ucfirst($file) . ".mitab." . $version . ".txt.zip"; $lfile = $ldir . $zip_file; $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; $ofile = "irefindex-" . $file . "." . parent::getParameterValue('output_format'); if (!file_exists($lfile)) { trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE); $download = true; } $rfile = $rdir . $zip_file; if ($download == true) { echo "downloading {$rfile}" . PHP_EOL; if (FALSE === Utils::DownloadSingle($rfile, $lfile)) { trigger_error("Error in Download"); return FALSE; } } $zin = new ZipArchive(); if ($zin->open($lfile) === FALSE) { trigger_error("Unable to open {$lfile}"); exit; } if ($zin->numFiles != 1) { trigger_error("Found more than one file ... using first file"); } $f = $zin->statIndex(0); $base_file = $f['name']; if (($fp = $zin->getStream($base_file)) === FALSE) { trigger_error("Unable to get {$base_file} in ziparchive {$lfile}"); return FALSE; } parent::setReadFile($lfile); parent::getReadFile()->setFilePointer($fp); echo "Processing " . $file . " ..."; parent::setWriteFile($odir . $ofile, true); if ($this->Parse() === FALSE) { trigger_error("Parsing Error"); exit; } parent::writeRDFBufferToWriteFile(); parent::getWriteFile()->close(); $zin->close(); echo "Done!" . PHP_EOL; $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("iRefIndex ({$zip_file}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/zip")->setPublisher("http://irefindex.uio.no")->setHomepage("http://irefindex.uio.no")->setRights("use")->setRights("by-attribution")->setRights("no-commercial")->setLicense("http://irefindex.uio.no/wiki/README_MITAB2.6_for_iRefIndex#License")->setDataset("http://identifiers.org/irefindex/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/irefindex/irefindex.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); parent::setGraphURI($graph_uri); } parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); return TRUE; }
/** * process a results xml file from the download directory **/ function process_file($infile) { $indir = parent::getParameterValue('indir'); $xml = new CXML($infile); $this->setCheckPoint('file'); while ($xml->Parse("clinical_study") == TRUE) { $this->setCheckPoint('record'); $this->root = $root = $xml->GetXMLRoot(); $this->nct_id = $nct_id = $this->getString("//id_info/nct_id"); $this->study_id = $study_id = parent::getNamespace() . "{$nct_id}"; ### declare $label = $this->getString("//brief_title"); if (!$label) { $label = $this->getString("//official_title"); } if (!$label) { $label = "Clinical trial #" . $nct_id; } parent::addRDF(parent::describeIndividual($study_id, $label, parent::getVoc() . "Clinical-Study") . parent::describeClass(parent::getVoc() . "Clinical-Study", "Clinical Study")); ########################################################################################## #required header ########################################################################################## parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "download-date", $this->getString('//required_header/download_date')) . parent::triplify($study_id, parent::getVoc() . "url", $this->getString('//required_header/url'))); ########################################################################################## #identifiers ########################################################################################## parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "nct-id", $this->getString('//id_info/nct_id'), "xsd:string") . parent::triplifyString($study_id, parent::getVoc() . "org-study-id", $this->getString('//id_info/org_study_id'), "xsd:string")); $sids = $root->xpath('//id_info/secondary_id'); if (isset($sids)) { foreach ($sids as $id) { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "secondary-id", (string) $id, "xsd:string")); } } $nctaliases = $root->xpath('//id_info/nct-alias'); if (isset($nctaliases)) { foreach ($nctaliases as $id) { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "nct-alias", (string) $id, "xsd:string")); } } ########################################################################################## #titles ########################################################################################## parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "brief-title", $this->getString("//brief_title")) . parent::triplifyString($study_id, parent::getVoc() . "official-title", $this->getString("//official_title"))); ################################################################################### #brief summary ################################################################################### $brief_summary = str_replace(array("\r", "\n", "\t"), array("
", "
", "	"), $this->getString('//brief_summary/textblock')); parent::addRDF(parent::triplifyString($study_id, $this->getVoc() . "brief-summary", $brief_summary)); #################################################################################### # detailed description #################################################################################### $d = str_replace(array("\r", "\n", "\t"), array("
", "
", "	"), $this->getString('//detailed_description/textblock')); parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "detailed-description", $d)); ######################################################################################### #acronym ######################################################################################### parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "acronym", $this->getString("//acronym"))); ######################################################################################## #sponsors ######################################################################################## try { $sponsors = array("lead_sponsor", "collaborator"); foreach ($sponsors as $sponsor) { $a = @array_shift($root->xpath('//sponsors/' . $sponsor)); if ($a == null) { break; } $agency = $this->getString("//agency", $a); $agency_id = parent::getRes() . md5($agency); $agency_class = $this->getString("//agency_class", $a); $agency_class_id = parent::getRes() . md5($agency_class); parent::addRDF(parent::describeIndividual($agency_id, $agency, parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $sponsor), $agency_id) . parent::describeIndividual($agency_class_id, $agency_class, parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplify($agency_id, parent::getVoc() . "organization", $agency_class_id)); } } catch (Exception $e) { echo "There was an error in the lead sponsor element: {$e}\n"; } ################################################################################# # source ################################################################################# $source = $this->getString('//source'); if ($source) { $source_id = parent::getRes() . md5($source); parent::addRDF(parent::describeIndividual($source_id, $source, parent::getVoc() . "Organization") . parent::triplify($study_id, parent::getVoc() . "source", $source_id)); } ###################################################################################### # oversight ###################################################################################### try { $oversight = @array_shift($root->xpath('//oversight_info')); $oversight_id = parent::getRes() . md5($oversight->asXML()); $authority = $this->getString('//authority', $oversight); $authority_id = parent::getRes() . md5($authority); parent::addRDF(parent::describeIndividual($oversight_id, $authority, parent::getVoc() . "Organization") . parent::triplify($study_id, $this->getVoc() . "oversight", $oversight_id) . parent::triplify($study_id, $this->getVoc() . "authority", $authority_id) . parent::triplifyString($oversight_id, parent::getVoc() . "has-dmc", $this->getString('//has_dmc', $oversight))); } catch (Exception $e) { echo "There was an error in the oversight info element: {$e}\n"; } ################################################################################# # overall status ################################################################################# $overall_status = $this->getString('//overall_status'); if ($overall_status) { $status_id = parent::getRes() . md5($overall_status); parent::addRDF(parent::describeIndividual($status_id, $overall_status, parent::getVoc() . "Status") . parent::describeClass(parent::getVoc() . "Status", "Status") . parent::triplify($study_id, parent::getVoc() . "overall-status", $status_id)); } ######################################################################################### #why stopped ######################################################################################### parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "why-stopped", $this->getString("//why_stopped"))); ################################################################################## # dates ################################################################################## $dates = array("start_date", "end_date", "completion_date", "primary_completion_date", "verification_date", "lastchanged_date", "firstreceived_date", "firstreceived_results_date"); foreach ($dates as $date) { $d = $this->getString('//' . $date); if ($d) { $datetime = $this->getDatetimeFromDate($d); if (isset($datetime)) { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . str_replace("_", "-", $date), $datetime)); } else { trigger_error("unable to parse date: {$d}", E_USER_ERROR); } } } #################################################################################### # phase #################################################################################### $phase = $this->getString('//phase'); if ($phase && $phase != "N/A") { $phase_id = $this->getRes() . md5($phase); parent::addRDF(parent::describeIndividual($phase_id, $phase, parent::getVoc() . "Phase", $phase) . parent::describeClass(parent::getVoc() . "Phase", $phase) . parent::triplify($study_id, parent::getVoc() . "phase", $phase_id)); } ################################################################################### # study type #################################################################################### $study_type = $this->getString('//study_type'); if ($study_type) { $study_type_id = $this->getRes() . md5($study_type); parent::addRDF(parent::describeClass($study_type_id, $study_type, parent::getVoc() . "Study-Type") . parent::describeClass(parent::getVoc() . "Study-Type", "Study Type") . parent::triplify($study_id, parent::getVoc() . "study-type", $study_type_id)); } ############################################################################### # study design ############################################################################### $study_design = $this->getString('//study_design'); if ($study_design) { $study_design_id = parent::getRes() . md5($study_id . $study_design); parent::addRDF(parent::describeIndividual($study_design_id, "{$study_id} study design", parent::getVoc() . "Study-Design") . parent::describeClass(parent::getVoc() . "Study-Design", "Study Design") . parent::triplify($study_id, parent::getVoc() . "study-design", $study_design_id)); // Intervention Model: Parallel Assignment, Masking: Double-Blind, Primary Purpose: Treatment foreach (explode(", ", $study_design) as $i => $b) { $c = explode(": ", $b); if (isset($c[1])) { $sdp = $study_design_id . "-" . ($i + 1); $key = parent::getRes() . md5($c[0]); $value = parent::getRes() . md5($c[1]); parent::addRDF(parent::describeIndividual($sdp, $b, parent::getVoc() . "Study-Design-Parameter") . parent::describeClass(parent::getVoc() . "Study-Design-Parameter", "Study Design Parameter") . parent::triplify($sdp, parent::getVoc() . "key", $key) . parent::describeClass($key, $c[0]) . parent::triplify($sdp, parent::getVoc() . "value", $value) . parent::describeClass($value, $c[1]) . parent::triplify($study_design_id, parent::getVoc() . "study-design-parameter", $sdp)); } } } #################################################################################### # target duration #################################################################################### parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "target-duration", $this->getString('//target_duration'))); ################################################################################ # outcomes ############################################################################### $outcomes = array("primary_outcome", "secondary_outcome", "other_outcome"); foreach ($outcomes as $outcome) { $o = $root->xpath('//' . $outcome); if ($o) { $os = $o; if (!is_array($o)) { $os = array($o); } foreach ($os as $o) { try { $po_id = parent::getRes() . md5($nct_id . $o->asXML()); $po_type = parent::getVoc() . str_replace("_", "-", $outcome); $measure = $this->getString('//measure', $o); $time_frame = $this->getString('//time_frame', $o); $safety_issue = $this->getString('//saftey_issue', $o); $description = $this->getString('//description', $o); parent::addRDF(parent::describeIndividual($po_id, $measure . " " . $time_frame, ucfirst($po_type)) . parent::describeClass(ucfirst($po_type), str_replace("_", " ", ucfirst($outcome))) . parent::triplifyString($po_id, "dc:description", $description) . parent::triplifyString($po_id, parent::getVoc() . "measure", $measure) . parent::triplifyString($po_id, parent::getVoc() . "time-frame", $time_frame) . parent::triplifyString($po_id, parent::getVoc() . "safety-issue", $safety_issue) . parent::triplify($study_id, parent::getVoc() . $po_type, $po_id)); } catch (Exception $e) { echo "There was an error parsing the primary outcome element: {$e} \n"; } } } } ############################################################################## #number of arms ############################################################################## try { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "number-of-arms", $this->getString('//number_of_arms'))); } catch (Exception $e) { echo "There was an exception parsing the number of arms element: {$e}\n"; } ############################################################################## #number of groups ############################################################################## try { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "number-of-arms", $this->getString('//number_of_groups'))); } catch (Exception $e) { echo "There was an exception parsing the number of groups: {$e}\n"; } ############################################################################## #enrollment ############################################################################## try { $e = $root->xpath('//enrollment'); if ($e) { $type = strtolower((string) $e[0]->attributes()->type); $value = $this->getString('//enrollment'); parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . ($type ? $type . "-" : "") . "enrollment", $value)); } } catch (Exception $e) { echo "There was an exception parsing the enrollment element: {$e}\n"; } ############################################################################### #condition ############################################################################### try { $conditions = $root->xpath('//condition'); foreach ($conditions as $condition) { $mesh_label_id = parent::getRes() . md5($condition); parent::addRDF(parent::triplify($study_id, parent::getVoc() . "condition", $mesh_label_id) . parent::describeClass($mesh_label_id, $condition, parent::getVoc() . "Condition") . parent::describeClass(parent::getVoc() . "Condition", "Condition")); } } catch (Exception $e) { echo "There was an exception parsing condition element: {$e}\n"; } ################################################################################ # arm_group ################################################################################ try { $arm_groups = $root->xpath('//arm_group'); foreach ($arm_groups as $arm_group) { $arm_group_id = $this->getString('./arm_group_label', $arm_group); $arm_group_id = md5($arm_group_id); $arm_group_uri = parent::getRes() . $this->nct_id . "/arm-group/" . $arm_group_id; $arm_group_label = $this->nct_id . " arm group " . $arm_group_id; $arm_group_type = ucfirst(str_replace(" ", "_", $this->getString('./arm_group_type', $arm_group))); if (!$arm_group_type) { $arm_group_type = "Clinical-Arm"; } $description = $this->getString('./description', $arm_group); parent::addRDF(parent::describeIndividual($arm_group_uri, $arm_group_label, parent::getVoc() . $arm_group_type) . parent::describeClass(parent::getVoc() . $arm_group_type, ucfirst(str_replace("_", " ", $arm_group_type))) . parent::triplifyString($arm_group_uri, parent::getVoc() . "description", $description) . parent::describeIndividual($arm_group_uri, $arm_group, parent::getVoc() . "Arm-Group") . parent::describeClass(parent::getVoc() . "Arm-Group", "Arm Group") . parent::triplify($study_id, parent::getVoc() . "arm-group", $arm_group_uri)); } } catch (Exception $e) { echo "There was an exception in arm groups: {$e}\n"; } ############################################################################## #intervention ############################################################################## try { $interventions = $root->xpath('//intervention'); foreach ($interventions as $intervention) { $intervention_id = parent::getRes() . md5($intervention->asXML()); $intervention_name = $this->getString('./intervention_name', $intervention); $intervention_type = $this->getString('./intervention_type', $intervention); $intervention_type_uri = parent::getVoc() . ucfirst(str_replace(" ", "_", $intervention_type)); $intervention_desc = $this->getString('./description', $intervention); $intervention_on = $this->getString('./other_name', $intervention); parent::addRDF(parent::describeIndividual($intervention_id, $intervention_name, $intervention_type_uri) . parent::describeClass($intervention_type_uri, $intervention_type) . parent::triplifyString($intervention_id, parent::getVoc() . "intervention-name", $intervention_name) . parent::triplifyString($intervention_id, parent::getVoc() . "intervention-desc", $intervention_desc) . parent::triplifyString($intervention_id, parent::getVoc() . "other-name", $intervention_on) . parent::triplify($study_id, parent::getvoc() . "intervention", $intervention_id)); $agl = $intervention->xpath("./arm_group_label"); foreach ($agl as $a) { $arm_group_id = md5($a); $ag = parent::getRes() . $this->nct_id . "/arm-group/" . $arm_group_id; parent::addRDF(parent::describeIndividual($ag, $a, parent::getVoc() . "Arm-Group") . parent::describeClass(parent::getVoc() . "Arm-Group", "Arm Group") . parent::triplify($intervention_id, parent::getVoc() . "arm-group", $ag)); } } } catch (Exception $e) { echo "There was an error in interventions {$e}\n"; } ############################################################################### #eligibility ################################################################################ try { $eligibility = @array_shift($root->xpath('//eligibility')); if ($eligibility !== null) { $eligibility_label = "eligibility for " . $study_id; $eligibility_id = parent::getRes() . md5($eligibility->asXML()); parent::addRDF(parent::describeIndividual($eligibility_id, $eligibility_label, parent::getVoc() . "Eligibility") . parent::describeClass(parent::getVoc() . "Eligibility", "Eligibility") . parent::triplify($study_id, parent::getVoc() . "eligibility", $eligibility_id)); if ($criteria = @array_shift($eligibility->xpath('./criteria'))) { $text = @array_shift($criteria->xpath('./textblock')); parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . "text", $text)); $c = preg_split("/(Inclusion Criteria\\:|Exclusion Criteria\\:)/", $text); //inclusion if (isset($c[1])) { $d = explode(" - ", $c[1]); // the lists are separated by a hyphen foreach ($d as $inclusion) { $inc = trim($inclusion); if ($inc != '') { $inc_id = parent::getRes() . md5($inc); parent::addRDF(parent::describeIndividual($inc_id, $inc, parent::getVoc() . "Inclusion-Criteria") . parent::describeClass(parent::getVoc() . "Inclusion-Criteria", "Inclusion Criteria") . parent::triplify($eligibility_id, parent::getVoc() . "inclusion-criteria", $inc_id)); } } } //exclusion if (isset($c[2])) { $d = explode(" - ", $c[1]); foreach ($d as $exclusion) { $exc = trim($exclusion); if ($exc != '') { $exc_id = parent::getRes() . md5($exc); parent::addRDF(parent::describeIndividual($exc_id, $exc, parent::getVoc() . "Exclusion-Criteria") . parent::describeClass(parent::getVoc() . "Exclusion-Criteria", "Exclusion Criteria") . parent::triplify($eligibility_id, parent::getVoc() . "exclusion-criteria", $exc_id)); } } } } parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . "gender", $this->getString('./gender', $eligibility))); parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . "healthy-volunteers", $this->getString('./healthy_volunteers', $eligibility))); $attributes = array('minimum_age', 'maximum_age'); foreach ($attributes as $a) { $s = $this->getString('./' . $a, $eligibility); if ($s != 'N/A') { $age = trim(str_replace("Years", "", $s)); parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . str_replace("_", "-", $a), $age)); } } $attributes = array("study_pop" => "study-population", "sampling_method" => "sampling-method"); foreach ($attributes as $a => $r) { $e = @array_shift($eligibility->xpath('./' . $a)); if ($s = $this->getString('./' . $a, $eligibility)) { parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . $r, $this->getString('./textblock', $e))); } } } } catch (Exception $e) { echo "There was an error in eligibility: {$e}\n"; } ###################################################################################### #biospec ##################################################################################### parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "biospec-retention", $this->getString('//biospec_retention'))); try { $b = @array_shift($root->xpath('//biospec_descr')); if ($b) { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "biospec_descr", $this->getString('./textblock', $b))); } } catch (Exception $e) { echo "There was an error in biospec_descr: {$e}\n"; } ################################################################### # contacts ################################################################### $contacts = array("overall_official", "overall_contact", "overall_contact_backup"); try { foreach ($contacts as $c) { $d = @array_shift($root->xpath('//' . $c)); if ($d) { parent::addRDF(parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $c), $this->makeContact($d))); } } } catch (Exception $e) { echo "There was an error parsing overall contact: {$e}" . "\n"; } ############################################################## # location of facility doing the testing ############################################################## try { $location = @array_shift($root->xpath('//location')); if ($location) { $location_uri = parent::getRes() . md5($location->asXML()); $name = $this->getString('//facility/name', $location); $address = @array_shift($location->xpath('//facility/address')); $contact = @array_shift($location->xpath('//contact')); $backups = @array_shift($location->xpath('//contact_backup')); $investigators = @array_shift($location->xpath('//investigator')); parent::addRDF(parent::describeIndividual($location_uri, $name, parent::getVoc() . "Location") . parent::describeClass(parent::getVoc() . "Location", "Location") . parent::triplifyString($location_uri, parent::getVoc() . "status", $this->getString('//status', $location)) . parent::triplify($study_id, parent::getVoc() . "location", $location_uri) . parent::triplify($location_uri, parent::getVoc() . "address", $this->makeAddress($address)) . ($contact != null ? parent::triplify($location_uri, parent::getVoc() . "contact", $this->makeContact($contact)) : "")); if ($backups) { foreach ($backups as $backup) { parent::addRDF(parent::triplify($location_uri, parent::getVoc() . "contact-backup", $this->makeContact($backup))); } } if ($investigators) { foreach ($investigators as $investigator) { parent::addRDF(parent::triplify($location_uri, parent::getVoc() . "investigator", $this->makeContact($investigator))); } } } } catch (Exception $e) { echo "There was an error parsing location: {$e}" . "\n"; } ###################################################################### #countries ###################################################################### try { $a = array("location_countries", "removed_countries"); foreach ($a as $country) { $lc = @array_shift($root->xpath('//' . $country)); if ($lc) { $label = $this->getString('//country', $lc); $cid = parent::getRes() . md5($label); parent::addRDF(parent::describeIndividual($cid, $label, parent::getVoc() . "Country") . parent::describeClass(parent::getVoc() . "Country", "Country") . parent::triplify($study_id, parent::getVoc() . "country", $cid)); } } } catch (Exception $e) { echo "There was an error parsing country: {$e}" . "\n"; } ###################################################################### #reference ###################################################################### try { $a = array("reference", "result_reference"); foreach ($a as $ref_type) { $references = $root->xpath('//' . $ref_type); foreach ($references as $reference) { $p = $this->getString('./PMID', $reference); if ($p) { $pmid = "pubmed:{$p}"; parent::addRDF(parent::describeIndividual($pmid, $p, parent::getVoc() . "Reference") . parent::describeClass(parent::getVoc() . "Reference", "Reference") . parent::triplifyString($pmid, parent::getVoc() . "citation", $this->getString('./citation', $reference)) . parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $ref_type), $pmid)); } } } } catch (Exception $e) { echo "There was an error parsing references element: {$e}\n"; } ####################################################################### #link ####################################################################### try { $links = $root->xpath('//link'); foreach ($links as $i => $link) { $url = $this->getString('./url', $link); $url = preg_replace("/>.*\$/", "", $url); $lid = parent::getRes() . md5($url); parent::addRDF(parent::describeIndividual($lid, $this->getString('./description', $link), parent::getVoc() . "Link") . parent::describeClass(parent::getVoc() . "Link", "Link") . parent::triplify($lid, parent::getVoc() . "url", $url) . parent::triplify($study_id, parent::getVoc() . "link", $lid)); } } catch (Exception $e) { echo "There was an error parsing link element: {$e}\n"; } ############################################################################ #responsible party ############################################################################ try { $rp = @array_shift($root->xpath('//responsible_party')); if ($rp) { $rp_id = parent::getRes() . md5($rp->asXML()); $label = $this->getString('./name_title', $rp); if (!$label) { $label = $this->getString('./organization', $rp); } else { $label .= ", " . $this->getString('./organization', $rp); } if (!$label) { $label = $this->getString('./party_type', $rp); } $org_id = parent::getRes() . md5($this->getString('./organization', $rp)); parent::addRDF(parent::describeIndividual($rp_id, $label, parent::getVoc() . "Responsible-Party") . parent::describeClass(parent::getVoc() . "Responsible-Party", "Responsible Party") . parent::triplify($study_id, parent::getVoc() . "responsible-party", $rp_id) . parent::triplify($rp_id, parent::getVoc() . "organization", $org_id) . parent::describeIndividual($org_id, $this->getString('./organization', $rp), parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplifyString($rp_id, parent::getVoc() . "name-title", $this->getString('./name_title', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "party-type", $this->getString('./party_type', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "investigator-affiliation", $this->getString('./investigator_affiliation', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "investigator-full-name", $this->getString('./investigator_full_name', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "investigator-title", $this->getString('./investigator_title', $rp))); } } catch (Exception $e) { echo "There was an error parsing the responsible_party element: {$e}\n"; } ############################################################################## # keywords ############################################################################## try { $keywords = $root->xpath('//keyword'); foreach ($keywords as $keyword) { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "keyword", (string) $keyword)); } } catch (Exception $e) { echo "There was an error parsing the keywords element: {$e}"; } # mesh terms # note: mesh terms are assigned using an imperfect algorithm try { $mesh_terms = $root->xpath('//condition_browse/mesh_term'); foreach ($mesh_terms as $mesh_term) { $term = (string) $mesh_term; $mesh_id = parent::getRes() . md5($term); parent::addRDF(parent::triplify($study_id, parent::getVoc() . "condition-mesh", $mesh_id)); parent::addRDF(parent::triplifyString($mesh_id, "rdfs:label", $term)); } } catch (Exception $e) { echo "There was an error in mesh_terms: {$e}\n"; } ################################################################################ # regulated by fda? is section 801? has expanded access? ################################################################################ try { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "is-fda-regulated", $this->getString('is_fda_regulated')) . parent::triplifyString($study_id, parent::getVoc() . "is-section-801", $this->getString('is_section_801')) . parent::triplifyString($study_id, parent::getVoc() . "has-expanded-access", $this->getString('has_expanded_access'))); } catch (Exception $e) { echo "There was an error parsing the is_fda_regulated element: {$e}\n"; } ############################################################################### # mesh terms for the intervention browse ############################################################################### try { $a = array("condition_browse", "intervention_browse"); foreach ($a as $browse_type) { $terms = $root->xpath("//{$browse_type}/mesh_term"); foreach ($terms as $term) { $term_label = (string) $term; $term_id = parent::getRes() . md5($term); parent::addRDF(parent::describeIndividual($term_id, $term_label, parent::getVoc() . "Term") . parent::describeClass(parent::getVoc() . "Term", "Term") . parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $browse_type), $term_id)); } } } catch (Exception $e) { echo "There was an error parsing {$browse_type}/mesh_term element: {$e}\n"; } ################################################################################ # clinical results ################################################################################ try { $cr = @array_shift($root->xpath('//clinical_results')); if ($cr) { $cr_id = parent::getRes() . md5($study_id . $cr->asXML()); parent::addRDF(parent::describeIndividual($cr_id, "clinical results for {$study_id}", parent::getVoc() . "Clinical-Result") . parent::describeClass(parent::getVoc() . "Clinical-Result", "Clinical Result") . parent::triplifyString($cr_id, parent::getVoc() . "description", $this->getString('./desc', $cr)) . parent::triplifyString($cr_id, parent::getVoc() . "restrictive-agreement", $this->getString('./restrictive_agreement', $cr)) . parent::triplifyString($cr_id, parent::getVoc() . "limitations-and-caveats", $this->getString('./limitations_and_caveats', $cr)) . parent::triplify($study_id, parent::getVoc() . "clinical-result", $cr_id)); } } catch (Exception $e) { echo "There was an error parsing clinical results: {$e}\n"; } ################################################################################ # Participant Flow ################################################################################ try { $pc = 1; $mc = 1; $wc = 1; $pf = @array_shift($root->xpath('//clinical_results/participant_flow')); if ($pf) { $pf_id = parent::getRes() . md5($pf->asXML()); parent::addRDF(parent::describeIndividual($pf_id, "participant flow for {$study_id}", parent::getVoc() . "Participant-Flow") . parent::describeClass(parent::getVoc() . "Participant-Flow", "Participant-Flow") . parent::triplify($study_id, parent::getVoc() . "participant-flow", $pf_id) . parent::triplifyString($pf_id, parent::getVoc() . "recruitment-details", $this->getString('./recruitment_details', $pf)) . parent::triplifyString($pf_id, parent::getVoc() . "pre-assignment-details", $this->getString('./pre_assignment_details', $pf))); $groups = @array_shift($pf->xpath('./group_list')); foreach ($groups as $group) { parent::addRDF(parent::triplify($pf_id, parent::getVoc() . "group", $this->makeGroup($group))); } //period_list $periods = @array_shift($pf->xpath('./period_list')); foreach ($periods as $period) { $period_id = parent::getRes() . $nct_id . "/period/" . $pc++; $period_title = $this->getString('./title', $period); parent::addRDF(parent::describeIndividual($period_id, $period_title . " for {$nct_id}", parent::getVoc() . "Period") . parent::describeClass(parent::getVoc() . "Period", "Period") . parent::triplify($pf_id, parent::getVoc() . "period", $period_id)); // milestones $milestones = @array_shift($period->xpath('./milestone_list')); if ($milestones) { foreach ($milestones as $milestone) { $milestone_id = parent::getRes() . $nct_id . "/milestone/" . $mc++; $label = $this->getString('./title', $milestone); parent::addRDF(parent::describeIndividual($milestone_id, $label, parent::getVoc() . "Milestone") . parent::describeClass(parent::getVoc() . "Milestone", "Milestone") . parent::triplify($period_id, parent::getVoc() . "milestone", $milestone_id)); // participants $p = 1; $ps_list = @array_shift($milestone->xpath('./participants_list')); foreach ($ps_list as $ps) { $ps_id = $milestone_id . "/p/" . $p++; $group_id = parent::getRes() . $this->nct_id . "/group/" . $ps->attributes()->group_id; $count = (string) $ps->attributes()->count; parent::addRDF(parent::describeIndividual($ps_id, "participant counts in " . $ps->attributes()->group_id . " for milestone {$mc} of {$nct_id}", parent::getVoc() . "Participant-Count") . parent::describeClass(parent::getVoc() . "Participant-Count", "Participant Count") . parent::triplify($ps_id, parent::getVoc() . "group", $group_id) . parent::triplifyString($ps_id, parent::getVoc() . "count", $count) . parent::triplify($milestone_id, parent::getVoc() . "participant-counts", $ps_id)); } } } // milestones $withdraws = @array_shift($period->xpath('./drop_withdraw_reason_list')); if ($withdraws) { foreach ($withdraws as $withdraw) { $wid = parent::getRes() . $this->nct_id . "/withdraw/" . $wc++; $label = $this->getString('./title', $withdraw); parent::addRDF(parent::describeIndividual($wid, $label, parent::getVoc() . "Withdraw-Reason") . parent::describeClass(parent::getVoc() . "Withdraw-Reason", "Withdraw Reason")); // participants $ps_list = @array_shift($withdraw->xpath('./participants_list')); foreach ($ps_list as $ps) { $group_id = parent::getRes() . $nct_id . "/group/" . $ps->attributes()->group_id; $count = (string) $ps->attributes()->count; parent::addRDF(parent::triplify($wid, parent::getVoc() . "group", $group_id) . parent::triplifyString($wid, parent::getVoc() . "count", $count)); } } } } } } catch (Exception $e) { echo "There was an error parsing participant flow element: {$e}\n"; } ################################################################################ # baseline ################################################################################ try { $baseline = @array_shift($root->xpath('//baseline')); if ($baseline) { $b_id = $this->nct_id . "/baseline"; $b_uri = parent::getRes() . $b_id; // group list $groups = @array_shift($baseline->xpath('./group_list')); foreach ($groups as $group) { parent::addRDF(parent::describeIndividual($b_uri, "baseline for {$nct_id}", parent::getVoc() . "Baseline") . parent::describeClass(parent::getVoc() . "Baseline", "Baseline") . parent::triplify($b_uri, parent::getVoc() . "group", $this->makeGroup($group)) . parent::triplify($study_id, parent::getVoc() . "baseline", $b_uri)); } // measure list $measures = @array_shift($baseline->xpath('./measure_list')); foreach ($measures as $measure) { parent::addRDF(parent::triplify($b_uri, parent::getVoc() . "measure", $this->makeMeasure($measure))); } } } catch (Exception $e) { echo "Error in parsing baseline" . PHP_EOL; } ################################################################################ # outcomes ################################################################################ try { $outcomes = @array_shift($root->xpath('//outcome_list')); if ($outcomes) { foreach ($outcomes as $i => $outcome) { $outcome_id = $this->nct_id . "/outcome/" . ($i + 1); $outcome_uri = parent::getRes() . $outcome_id; $outcome_label = $this->getString("./title", $outcome); if (!$outcome_label) { $outcome_label = "outcome for " . $this->nct_id; } parent::addRDF(parent::describeIndividual($outcome_uri, $outcome_label, parent::getVoc() . "Outcome", $this->getString("./description", $outcome)) . parent::describeClass(parent::getVoc() . "Outcome", "Outcome") . parent::triplify($study_id, parent::getVoc() . "outcome", $outcome_uri) . parent::triplifyString($outcome_uri, parent::getVoc() . "type", $this->getString("./type", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "time-frame", $this->getString("./time_frame", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "safety-issue", $this->getString("./safety_issue", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "posting-date", $this->getString("./posting-date", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "population", $this->getString("./population", $outcome))); $groups = @array_shift($outcome->xpath('./group_list')); if ($groups) { foreach ($groups as $group) { parent::addRDF(parent::triplify($outcome_uri, parent::getVoc() . "group", $this->makeGroup($group))); } } // measure list $measures = @array_shift($outcome->xpath('./measure_list')); if ($measures) { foreach ($measures as $measure) { parent::addRDF(parent::triplify($outcome_uri, parent::getVoc() . "measure", $this->makeMeasure($measure))); } } // analysis list $analyses = @array_shift($outcome->xpath('./analysis_list')); if ($analyses) { foreach ($analyses as $analysis) { parent::addRDF(parent::triplify($outcome_uri, parent::getVoc() . "analysis", $this->makeAnalysis($analysis))); } } } } } catch (Exception $e) { echo "Error in parsing outcomes" . PHP_EOL; } ################################################################################ # events ################################################################################ try { $c_ev = $c_c = 1; $reported_events = @array_shift($root->xpath('//reported_events')); if ($reported_events) { $rp_id = parent::getRes() . md5($reported_events->asXML()); $groups = @array_shift($reported_events->xpath('./group_list')); parent::addRDF(parent::describeIndividual($rp_id, "Reported events for {$nct_id}", parent::getVoc() . "Reported-Events") . parent::describeClass(parent::getVoc() . "Reported-Events", "Reported Events") . parent::triplify($study_id, parent::getVoc() . "reported-events", $rp_id)); foreach ($groups as $group) { parent::addRDF(parent::triplify($rp_id, parent::getVoc() . "group", $this->makeGroup($group))); } // events $event_list = array("serious_events" => "Serious Event", "other_events" => "Other Event"); foreach ($event_list as $ev => $ev_label) { $et = @array_shift($reported_events->xpath('./' . $ev)); if (!$et) { continue; } $ev_uri = parent::getVoc() . str_replace(" ", "-", $ev_label); $categories = @array_shift($et->xpath('./category_list')); foreach ($categories as $category) { $major_title = $this->getString('./title', $category); $major_title_uri = parent::getRes() . md5($major_title); $events = @array_shift($category->xpath('./event_list')); foreach ($events as $event) { $e_uri = parent::getRes() . $this->nct_id . "/{$ev}/" . $c_ev++; $subtitle = (string) $this->getString('./sub_title', $event) . " for " . $this->nct_id; $subtitle_uri = parent::getRes() . md5($subtitle); parent::addRDF(parent::describeIndividual($e_uri, $subtitle, $ev_uri) . parent::describeClass($ev_uri, $ev_label) . parent::triplify($e_uri, parent::getVoc() . "sub-title", $subtitle_uri) . parent::describeIndividual($subtitle_uri, $subtitle, parent::getVoc() . "Event") . parent::describeClass(parent::getVoc() . "Event", "Event") . parent::triplify($e_uri, parent::getVoc() . "major-title", $major_title_uri) . parent::describeClass($major_title_uri, $major_title) . parent::triplify($rp_id, parent::getVoc() . str_replace("_", "-", $ev), $e_uri)); $counts = $event->xpath('./counts'); foreach ($counts as $c) { $group_id = $c->attributes()->group_id; $group_uri = parent::getRes() . $nct_id . "/group/" . $group_id; $c_uri = $e_uri . "/count/" . $c_c++; parent::addRDF(parent::describeIndividual($c_uri, $subtitle . " for " . $group_id . " in " . $this->nct_id, parent::getVoc() . "Event-Count") . parent::describeClass(parent::getVoc() . "Event-Count", "Event Count") . parent::triplify($c_uri, parent::getVoc() . "group", $group_uri) . parent::triplify($e_uri, parent::getVoc() . "count", $c_uri) . parent::triplifyString($c_uri, parent::getVoc() . "default-vocabulary", $this->getString('./default_vocab', $et)) . parent::triplifyString($c_uri, parent::getVoc() . "frequency-threshold", $this->getString('./frequency_threshold', $et)) . parent::triplifyString($c_uri, parent::getVoc() . "default-assessment", $this->getString('./default_assessment', $et)) . parent::triplifyString($c_uri, parent::getVoc() . "number-events", $c->attributes()->events) . parent::triplifyString($c_uri, parent::getVoc() . "subjects-affected", $c->attributes()->subjects_affected) . parent::triplifyString($c_uri, parent::getVoc() . "subjects-at-risk", $c->attributes()->subjects_at_risk)); } } } } } } catch (Exception $e) { echo "Error in parsing reported events" . PHP_EOL; } parent::writeRDFBufferToWriteFile(); } $this->setCheckPoint('record'); $this->setCheckPoint('dataset'); }
private function process() { $z = 0; $y = 1; while ($l = $this->getReadFile()->Read(200000)) { if ($z++ % 1000000 == 0) { echo $z . PHP_EOL; $odir = parent::getParameterValue('outdir'); $ofile = 'iproclass.' . $y++ . "." . parent::getParameterValue('output_format'); $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false; if (parent::getWriteFile() != null) { parent::getWriteFile()->close(); parent::clear(); } // generate a new file parent::setWriteFile($odir . $ofile, $gz); } $fields = explode("\t", $l); @($uniprot_acc = $fields[0]); @($uniprot = $fields[1]); @($gene = $fields[2]); @($refseq = $fields[3]); @($gi = $fields[4]); @($pdb = $fields[5]); @($pfam = $fields[6]); @($go = $fields[7]); @($pirsf = $fields[8]); @($ipi = $fields[9]); @($uniref_100 = $fields[10]); @($uniref_90 = $fields[11]); @($uniref_50 = $fields[12]); @($uniparc = $fields[13]); //skipping pir-psd because db no longer maintained @($ncbi_taxonomy = $fields[15]); @($mim = $fields[16]); @($unigene = $fields[17]); @($ensembl = $fields[18]); @($pubmed = $fields[19]); @($embl_genbank_ddbj = $fields[20]); @($embl_protein = trim($fields[21])); $id = $uniprot_acc; $id_res = $this->getNamespace() . $id; $id_label = "iproclass entry for uniprot:{$uniprot_acc}"; parent::addRDF(parent::triplify($id_res, $this->getVoc() . "x-uniprot", "uniprot:" . $uniprot_acc)); if (!empty($uniprot)) { $uniprot_ids = explode("; ", $uniprot); foreach ($uniprot_ids as $uniprot_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-uniprot", "uniprot:" . $uniprot_id)); } } if (!empty($gene)) { $gene_ids = explode("; ", $gene); foreach ($gene_ids as $gene_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-ncbigene", "geneid:" . $gene_id)); } } if (!empty($refseq)) { $refseq_ids = explode("; ", $refseq); foreach ($refseq_ids as $refseq_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-refseq", "refseq:" . $refseq_id)); } } if (!empty($gi)) { $gi_ids = explode("; ", $gi); foreach ($gi_ids as $gi_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-gi", "gi:" . $gi_id)); } } if (!empty($pdb)) { $pdb_ids = explode("; ", $pdb); foreach ($pdb_ids as $pdb_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-pdb", "pdb:" . $pdb_id)); } } if (!empty($pfam)) { $pfam_ids = explode("; ", $pfam); foreach ($pfam_ids as $pfam_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-pfam", "pfam:" . $pfam_id)); } } if (!empty($go)) { $go_ids = explode("; ", $go); foreach ($go_ids as $go_id) { $go_id = substr($go_id, 3); parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-go", "go:" . $go_id)); } } if (!empty($pirsf)) { $pirsf_ids = explode("; ", $pirsf); foreach ($pirsf_ids as $pirsf_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-pirsf", "pirsf:" . $pirsf_id)); } } if (!empty($ipi)) { $ipi_ids = explode("; ", $ipi); foreach ($ipi_ids as $ipi_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-ipi", "ipi:" . $ipi_id)); } } if (!empty($uniref_100)) { $uniref_100_ids = explode("; ", $uniref_100); foreach ($uniref_100_ids as $uniref_100_id) { parent::AddRDF(parent::QQuadO_URL($id_res, "rdfs:seeAlso", "http://uniprot.org/uniref/" . $uniref_100_id)); } } if (!empty($uniref_90)) { $uniref_90_ids = explode("; ", $uniref_90); foreach ($uniref_90_ids as $uniref_90_id) { parent::AddRDF(parent::QQuadO_URL($id_res, "rdfs:seeAlso", "http://uniprot.org/uniref/" . $uniref_90_id)); } } if (!empty($uniref_50)) { $uniref_50_ids = explode("; ", $uniref_50); foreach ($uniref_50_ids as $uniref_50_id) { parent::AddRDF(parent::QQuadO_URL($id_res, "rdfs:seeAlso", "http://uniprot.org/uniref/" . $uniref_50_id)); } } if (!empty($uniparc)) { $uniparc_ids = explode("; ", $uniparc); foreach ($uniparc_ids as $uniparc_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-uniparc", "uniparc:" . $uniparc_id) . parent::QQuadO_URL($id_res, "rdfs:seeAlso", "http://uniprot.org/uniparc/" . $uniparc_id)); } } if (!empty($ncbi_taxonomy)) { $taxonomy_ids = explode("; ", $ncbi_taxonomy); foreach ($taxonomy_ids as $taxonomy_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-taxon", "taxon:" . $taxonomy_id)); } } if (!empty($mim)) { $mim_ids = explode("; ", $mim); foreach ($mim_ids as $mim_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-omim", "omim:" . $mim_id)); } } if (!empty($unigene)) { $unigene_ids = explode("; ", $unigene); foreach ($unigene_ids as $unigene_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-unigene", "unigene:" . $unigene_id)); } } if (!empty($ensembl)) { $ensembl_ids = explode("; ", $ensembl); foreach ($ensembl_ids as $ensembl_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-ensembl", "ensembl:" . $ensembl_id)); } } if (!empty($pubmed)) { $pubmed_ids = explode("; ", $pubmed); foreach ($pubmed_ids as $pubmed_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-pubmed", "pubmed:" . $pubmed_id)); } } if (!empty($embl_genbank_ddbj)) { $genbank_ids = explode("; ", $embl_genbank_ddbj); foreach ($genbank_ids as $genbank_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-genbank", "genbank:" . $genbank_id)); } } if (!empty($embl_protein)) { $embl_protein_ids = explode(";", $embl_protein); foreach ($embl_protein_ids as $embl_protein_id) { parent::AddRDF(parent::triplify($id_res, $this->getVoc() . "x-genbank", "genbank:" . $embl_protein_id)); } } //write rdf to file $this->WriteRDFBufferToWriteFile(); } //while }
function Run() { // directory shortcuts $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); // get the work specified $list = trim(parent::getParameterValue('files')); if ($list == 'all') { // call the getAllModelsId webservice $file = $ldir . "all_models.json"; if (!file_exists($file)) { try { $x = @new SoapClient("http://www.ebi.ac.uk/biomodels-main/services/BioModelsWebServices?wsdl"); } catch (Exception $e) { echo $e->getMessage(); } $entries = $x->getAllModelsId(); file_put_contents($file, json_encode($entries)); } else { $entries = json_decode(file_get_contents($file)); } } elseif ($list == 'curated') { // call the getAllCuratedModelsId webservice $file = $ldir . "curated_models.json"; if (!file_exists($file)) { try { $x = @new SoapClient("http://www.ebi.ac.uk/biomodels-main/services/BioModelsWebServices?wsdl"); } catch (Exception $e) { echo $e->getMessage(); } $entries = $x->getAllCuratedModelsId(); file_put_contents($file, json_encode($entries)); } else { $entries = json_decode(file_get_contents($file)); } } else { // check if a hyphenated list was provided if (($pos = strpos($list, "-")) !== FALSE) { $start_range = substr($list, 0, $pos); $end_range = substr($list, $pos + 1); for ($i = $start_range; $i <= $end_range; $i++) { $entries[] = "BIOMD" . str_pad($i, 10, "0", STR_PAD_LEFT); } } else { // for comma separated list $b = explode(",", $this->GetParameterValue('files')); foreach ($b as $e) { $entries[] = "BIOMD" . str_pad($e, 10, "0", STR_PAD_LEFT); } } } $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } // set the write file $suffix = parent::getParameterValue('output_format'); $outfile = 'biomodels' . '.' . $suffix; $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } $dataset_description = ''; parent::setWriteFile($odir . $outfile, $gz); // iterate over the entries $i = 0; $total = count($entries); foreach ($entries as $id) { echo "processing " . ++$i . " of {$total} - biomodel# " . $id; $download_file = $ldir . $id . ".owl.gz"; $url = parent::getParameterValue('download_url') . "publ/{$id}/{$id}-biopax3.owl"; // download if the file doesn't exist or we are told to if (!file_exists($download_file) || $this->GetParameterValue('download') == 'true') { // download echo " - downloading"; $ret = utils::downloadsingle($url, 'compress.zlib://' . $download_file, true); if ($ret === false) { echo "\nTrying non-curated model"; $url = parent::getParametervalue('download_url') . "uncura_publ/{$id}/{$id}-biopax3.owl"; $ret = utils::downloadsingle($url, 'compress.zlib://' . $download_file, true); if ($ret === false) { continue; } } echo " - downloaded"; } // load entry, parse and write to file echo " - parsing... "; // $this->SetReadFile($download_file,true); $buf = file_get_contents("compress.zlib://" . $download_file); $converter = new BioPAX2Bio2RDF($this); $converter->SetBuffer($buf)->SetBioPAXVersion(3)->SetBaseNamespace("http://identifiers.org/biomodels.db/{$id}/")->SetBio2RDFNamespace("http://bio2rdf.org/biomodels:" . $id . "_")->SetDatasetURI($this->GetDatasetURI()); $rdf = $converter->Parse(); parent::addRDF($rdf); parent::writeRDFBufferToWriteFile(); //generate dataset description $source_file = (new DataResource($this))->setURI($url)->setTitle("EBI BioModels Database - BioModel # {$id}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($download_file)))->setFormat("rdf/xml")->setPublisher("http://www.ebi.ac.uk/")->setHomepage("http://www.ebi.ac.uk/biomodels-main/")->setRights("use-share-modify")->setLicense("http://www.ebi.ac.uk/biomodels-main/termsofuse")->setDataset("http://identifiers.org/biomodels.db/"); $dataset_description .= $source_file->toRDF(); echo "done!" . PHP_EOL; } //foreach parent::getWriteFile()->close(); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/biomodels/biomodels.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $output_file->toRDF(); //write dataset description to file parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function process_file($infile) { $odir = parent::getParameterValue('outdir'); $suffix = parent::getParameterValue('output_format'); $ofile = $odir . basename($infile, ".xml.gz") . '.' . $suffix; $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; $fp = gzopen($infile, "r") or die("Could not open file " . $infile . "!\n"); $this->setReadFile($infile); $this->getReadFile()->setFilePointer($fp); $this->setWriteFile($ofile, $gz); $this->setCheckPoint('file'); $this->pubmed(); $this->writeRDFBufferToWriteFile(); $this->getWriteFile()->close(); }
function Run() { $sp = trim(parent::getParameterValue('files')); if ($sp == 'all') { $files = $this->getPackageMap(); } else { $s_a = explode(",", $sp); $pm = $this->getPackageMap(); $files = array(); foreach ($s_a as $a) { if (array_key_exists($a, $pm)) { $files[$a] = $pm[$a]; } } } //else $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $dd = ''; //now iterate over the files array $year = parent::getParameterValue('year'); foreach ($files as $k => $fpattern) { $file = str_replace("YEAR", $year, $fpattern); $lfile = $ldir . $file; $rfile = parent::getParameterValue("download_url") . $file; // download if necessary if (!file_exists($lfile) || parent::getParameterValue('download') == "true") { echo "Downloading {$file} ... "; $ret = utils::downloadSingle($rfile, $lfile); if ($ret === FALSE) { trigger_error("Unable to get {$file}", E_USER_ERROR); continue; } echo "done!" . PHP_EOL; } //set the outfile $ofile = "mesh_" . $k . "." . parent::getParameterValue('output_format'); $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false; echo "processing {$k} ..."; parent::setReadFile($lfile, FALSE); parent::setWriteFile($odir . $ofile, $gz); $fnx = $k; $this->{$fnx}(); parent::writeRDFBufferToWriteFile(); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; $source_file = (new DataResource($this))->setURI($rfile)->setTitle("MeSH")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/x-mesh-record")->setPublisher("http://www.nlm.nih.gov")->setHomepage("http://www.nlm.nih.gov/mesh/")->setRights("use")->setLicense("http://www.nlm.nih.gov/databases/download.html")->setDataset("http://identifiers.org/mesh/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = parent::getDate(filemtime($odir . $ofile)); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/mesh/mesh.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dd .= $source_file->toRDF() . $output_file->toRDF(); } //foreach parent::setWriteFile($odir . $this->getBio2RDFReleaseFile($this->getNamespace())); parent::getWriteFile()->write($dd); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function run() { if (parent::getParameterValue('files') == 'all') { $files = explode("|", $this->GetParameterList('files')); array_shift($files); } else { $files = explode(",", parent::getParameterValue('files')); } $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); $dataset_description = ''; foreach ($files as $file) { $download = parent::getParameterValue('download'); $lfile = $ldir . "goa_" . $file . ".gz"; if (!file_exists($lfile) && $download == false) { trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE); $download = true; } //download file $rfile = $rdir . strtoupper($file) . "/gene_association.goa_" . $file . ".gz"; if ($download == true) { echo "downloading {$file} ... "; //file_put_contents($lfile,file_get_contents($rfile)); utils::DownloadSingle($rfile, $lfile); } $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; $ofile = "goa_" . $file . "." . parent::getParameterValue('output_format'); parent::setReadFile($lfile, TRUE); parent::setWriteFile($odir . $ofile, $gz); echo "processing {$file} ... "; $this->process($file); echo "done!"; parent::clear(); //close write file parent::getWriteFile()->close(); echo PHP_EOL; // dataset description $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Gene Ontology Annotation file {$file} ({$rfile}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://www.ebi.ac.uk/")->setHomepage("http://www.ebi.ac.uk/GOA/")->setRights("use")->setLicense("http://www.ebi.ac.uk/GOA/goaHelp.html")->setDataset("http://identifiers.org/goa/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/irefindex/irefindex.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); parent::setGraphURI($graph_uri); } parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function getListOfEntries($ldir) { // get the master list of entries $file = "mim2gene.txt"; if (!file_exists($ldir . $file)) { trigger_error($ldir . $file . " not found. Will attempt to download. ", E_USER_NOTICE); $this->SetParameterValue('download', true); } if (parent::getParameterValue('download') == true) { // connect if (!isset($ftp)) { $host = 'ftp.omim.org'; echo "connecting to {$host} ..."; $ftp = ftp_connect($host); if (!$ftp) { echo "Unable to connect to {$host}" . PHP_EOL; die; } ftp_pasv($ftp, true); $login = ftp_login($ftp, 'anonymous', '*****@*****.**'); if (!$ftp || !$login) { echo "FTP-connect failed!"; die; } else { echo "Connected" . PHP_EOL; } } // download ftp_pasv($ftp, true); echo "Downloading {$file} ..."; if (ftp_get($ftp, $ldir . $file, 'OMIM/' . $file, FTP_BINARY) === FALSE) { trigger_error("Error in downloading {$file}"); continue; } if (isset($ftp)) { ftp_close($ftp); } echo "success!" . PHP_EOL; } // parse the mim2gene file for the entries // # Mim Number Type Gene IDs Approved Gene Symbols $fp = fopen($ldir . $file, "r"); fgets($fp); while ($l = fgets($fp)) { $a = explode("\t", $l); if ($a[1] != "moved/removed") { $list[$a[0]] = $a[1]; } } fclose($fp); return $list; }
function process() { $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); //which files are to be converted? $files = trim($this->GetParameterValue('files')); if ($files == 'all') { $files = $this->getPackageMap(); } else { $sel_arr = explode(",", $files); $pm = $this->getPackageMap(); $files = array(); foreach ($sel_arr as $a) { if (array_key_exists($a, $pm)) { $files[$a] = $pm[$a]; } } } if ($this->getParameterValue('limit_organisms') == true) { $this->taxids = array_flip(explode(",", $this->getParameterValue('organisms'))); } //set dataset graph to be dataset URI $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } $dataset_description = ''; //now iterate over the files array foreach ($files as $module => $rfilename) { $file = $module . ".gz"; $lfile = $ldir . $file; $rfile = $rdir . $rfilename; // download if (!file_exists($lfile) || parent::getParameterValue('download') == true) { trigger_error("{$lfile} not found. Will attempt to download.", E_USER_NOTICE); $myfile = $lfile; if ($module == "gene2sts" || $module == "gene2unigene") { $myfile = "compress.zlib://" . $lfile; } echo "downloading {$module} ..."; utils::DownloadSingle($rfile, $myfile); echo "done" . PHP_EOL; } } foreach ($files as $module => $rfilename) { $file = $module . ".gz"; $lfile = $ldir . $file; $rfile = $rdir . $rfilename; $ofile = $module . "." . parent::getParameterValue('output_format'); $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } echo "Processing {$module} ... "; parent::setReadFile($lfile, true); parent::setWriteFile($odir . $ofile, $gz); $fnx = $module; if ($module == 'gene2refseq') { $fnx = 'gene2accession'; } $this->{$fnx}(); parent::clear(); echo 'done!' . PHP_EOL; parent::getReadFile()->close(); parent::getWriteFile()->close(); // generate the dataset release file // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("NCBI Gene ({$module})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://www.ncbi.nlm.nih.gov")->setHomepage("http://www.ncbi.nlm.nih.gov/gene")->setRights("use-share-modify")->setLicense("http://www.ncbi.nlm.nih.gov/About/disclaimer.html")->setDataset("http://identifiers.org/ncbigene/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ncbigene/ncbigene.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); } //foreach //set graph URI back to default value parent::setGraphURI($graph_uri); //write dataset description to file echo "Generating dataset description... "; parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
public function Run() { $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); // make sure we have the zip archive //which files are to be converted? $selectedPackage = trim(parent::getParameterValue('files')); if ($selectedPackage == 'all') { $files = $this->getPackageMap(); } else { $sel_arr = explode(",", $selectedPackage); $pm = $this->getPackageMap(); $files = array(); foreach ($sel_arr as $a) { if (array_key_exists($a, $pm)) { $files[$a] = $pm[$a]; } } } $dataset_description = ''; foreach ($files as $key => $value) { $lfile = $ldir . $value['filename']; if (!file_exists($lfile) && parent::getParameterValue('download') == false) { trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE); $this->SetParameterValue('download', true); } //download all files [except mapping file] if ($this->GetParameterValue('download') == true) { $rfile = $value["file_url"]; echo "downloading " . var_dump($value["file_url"]) . " ... "; utils::downloadSingle($rfile, $lfile); } if ($key == "taxdmp" || $key == "gi2taxid_protein" || $key == "gi2taxid_nucleotide") { //get the name of the zip archive $lfile = $value["filename"]; // make sure we have the zip archive $zinfile = $ldir . $lfile; $zin = new ZipArchive(); if ($zin->open($zinfile) === FALSE) { trigger_error("Unable to open {$zinfile}"); exit; } //now iterate over the files in the ziparchive $source_file = (new DataResource($this))->setURI($value['file_url'])->setTitle('NCBI Taxonomy - ' . $key)->setRetrievedDate(date("Y-m-d\\TH:i:sP", filemtime($ldir . $lfile)))->setFormat('text/tab-separated-value')->setFormat('application/zip')->setPublisher('http://www.ncbi.nlm.nih.gov')->setHomepage('http://www.ncbi.nlm.nih.gov/taxonomy')->setRights('use')->setRights('attribution')->setLicense('https://www.nlm.nih.gov/copyright.html')->setDataset(parent::getDatasetURI()); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TH:i:sP"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$key}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/taxonomy/taxonomy.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("restricted-by-source-license")->setLicense("http://creativecommons/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); $dataset_description .= $output_file->toRDF() . $source_file->toRDF(); foreach ($value["contents"] as $k => $fn) { if ($k == "names" || $k == "nodes" || $k == "citations" || $k == "gencode" || $k == "division" || $k == "gi_taxid_prot" || $k == "gi_taxid_nucl") { //if($k !== 'citations') continue; $fpin = $zin->getStream($fn); if (!$fpin) { trigger_error("Unable to get pointer to {$fn} in {$zinfile}"); exit("failed\n"); } $gzoutfile = $odir . "taxonomy-{$k}" . "." . parent::getParameterValue('output_format'); //set the write file $gz = strstr(parent::getParameterValue('output_format'), 'gz') ? true : false; parent::setReadFile($ldir . $lfile); parent::getReadFile()->SetFilePointer($fpin); parent::setWriteFile($gzoutfile, $gz); echo "processing {$fn}...\n"; $this->{$k}(); $this->GetWriteFile()->Close(); echo "done!" . PHP_EOL; parent::clear(); } //if $k } //foreach } //if key taxdmp $this->setWriteFile($odir . $this->getBio2RDFReleaseFile()); $this->getWriteFile()->write($dataset_description); $this->getWriteFile()->close(); } }
function Run() { $dataset_description = ''; $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $list_file = $ldir . "ftp_list.txt"; if (!file_exists($list_file) || $this->getParameterValue('download') == true) { echo "Getting FTP file list ..."; $list = $this->getFtpFileList('ftp.ncbi.nlm.nih.gov', '/refseq/release/complete/', '/(complete\\.[0-9]+\\.protein\\.gpff\\.gz)/'); if (!isset($list) or count($list) == 0) { trigger_error("Unable to get list of files from FTP site. Check internet connection", E_USER_ERROR); exit(-1); } asort($list); $buf = implode("\n", $list); file_put_contents($list_file, $buf); echo "Done." . PHP_EOL; } else { echo "Using existing ftp list" . PHP_EOL; $list = explode("\n", file_get_contents($list_file)); } $counter = 1; $total = count($list); foreach ($list as $f) { $lfile = $ldir . $f; echo "Processing " . $counter++ . "/{$total} {$f}. "; if (!file_exists($lfile) || $this->getParameterValue('download') == true) { $rfile = parent::getParameterValue('download_url') . $f; echo "Downloading ..."; utils::DownloadSingle($rfile, $lfile); echo "done."; } else { echo "Using existing file."; } echo PHP_EOL; } //if download //iterate over the files $files = $this->getFilePaths($ldir, 'gz'); asort($files); foreach ($files as $f) { $lfile = $ldir . $f; $ofile = $odir . basename($f, ".gz") . "." . parent::getParameterValue('output_format'); $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false; parent::setWriteFile($ofile, $gz); parent::setReadFile($lfile, true); echo "processing {$f} ..."; $this->process(); parent::clear(); echo "done!" . PHP_EOL; $this->getReadFile()->close(); $this->getWriteFile()->close(); $source_file = (new DataResource($this))->setURI(parent::getParameterValue('download_url') . $lfile)->setTitle("NCBI RefSeq - {$f}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat('text/refseq-format')->setFormat('application/zip')->setPublisher('http://www.ncbi.nlm.nih.gov')->setHomepage('http://www.ncbi.nlm.nih.gov/refseq')->setRights('use')->setRights('attribution')->setLicense('http://www.nlm.nih.gov/copyright.html')->setDataset(parent::getDatasetURI()); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$f}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/refseq/refseq.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("restricted-by-source-license")->setLicense("http://creativecommons/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); $dataset_description .= $output_file->toRDF() . $source_file->toRDF(); } //for parent::writeToReleaseFile($dataset_description); parent::getWriteFile()->close(); }
function process() { if (parent::getParameterValue('files') == 'all') { $files = explode("|", parent::getParameterList('files')); array_shift($files); } else { $files = explode(",", parent::getParameterValue('files')); } $remote_files = array("human" => "human_genes.zip", "models" => "models_genes.zip"); $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); $dataset_description = ''; $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } foreach ($files as $file) { $lfile = $ldir . $remote_files[$file]; $rfile = $rdir . $remote_files[$file]; if (!file_exists($lfile)) { trigger_error($lfile . " not found. Will attempt to download." . PHP_EOL, E_USER_WARNING); echo "Downloading {$rfile}... "; Utils::DownloadSingle($rfile, $lfile); echo "done!" . PHP_EOL; } $suffix = parent::getParameterValue('output_format'); $ofile = "genage_" . $file . '.' . $suffix; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } $zin = new ZipArchive(); if ($zin->open($lfile) === FALSE) { trigger_error("Unable to open {$lfile}"); exit; } if ($file == "human") { $zipentry = "genage_human.csv"; } else { if ($file == "models") { $zipentry = "genage_models.csv"; } } if (($fp = $zin->getStream($zipentry)) === FALSE) { trigger_error("Unable to get {$zipentry} in ziparchive {$lfile}"); return FALSE; } parent::SetReadFile($lfile); parent::GetReadFile()->SetFilePointer($fp); // set the write file, parse, write and close $suffix = parent::getParameterValue('output_format'); $outfile = "genage_" . $file . '.' . $suffix; $gz = false; if (strstr($suffix, "gz")) { $gz = true; } parent::setWriteFile($odir . $ofile, $gz); echo "Processing {$lfile}... "; $fnx = $file; $this->{$fnx}(); echo "done!" . PHP_EOL; parent::getWriteFile()->close(); // generate the dataset release file echo "Generating dataset description for {$ofile}... "; // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Human Ageing Genomic Resources GenAge database (" . $remote_files[$file] . ")")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/comma-separated-value")->setFormat("application/gzip")->setPublisher("http://genomics.senescence.info/")->setHomepage("http://genomics.senescence.info/genes/")->setRights("use")->setLicense("http://genomics.senescence.info/legal.html")->setDataset("http://identifiers.org/genage/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/genage/genage.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); echo "done!" . PHP_EOL; } parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); }
/** * Function to begin parsing the local copy of the pubchem substances directory **/ function parse_substances() { $ignore = array(".", ".."); $input_dir = $this->getParameterValue('indir') . "/substances/"; $gz = false; $this->CreateDirectory($this->getParameterValue('outdir') . "/substances/"); parent::setDatasetURI("bio2rdf_dataset:bio2rdf-" . $this->getPcsPrefix() . "-" . date("Ymd")); $graph_uri = parent::getGraphURI(); //set graph URI to dataset uri if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } $dataset_description = ''; if ($handle = opendir($input_dir)) { while (false !== ($file = readdir($handle))) { if (in_array($file, $ignore)) { continue; } echo "Processing file: " . $input_dir . $file . PHP_EOL; $suffix = parent::getParameterValue('output_format'); $outfile = realpath($this->getParameterValue('outdir')) . "/substances/" . basename($file, ".xml.gz") . "." . $suffix; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } echo "... into " . $outfile . PHP_EOL; parent::setCheckpoint('file'); $this->setWriteFile($outfile, $gz); $this->parse_substance_file($input_dir, $file); $this->getWriteFile()->close(); } closedir($handle); $source_file = (new DataResource($this))->setURI("http://www.ncbi.nlm.nih.gov/pcsubstance")->setTitle("PubChem Substance")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($input_dir)))->setFormat("text/xml")->setFormat("application/zip")->setPublisher("http://ncbi.nlm.nih.gov/")->setHomepage("http://pubchem.ncbi.nlm.nih.gov/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("ftp://ftp.ncbi.nlm.nih.gov/pubchem/README")->setDataset("http://identifiers.org/pubchem.substance/"); $prefix = $this->getPcsPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pubchem/pubchem.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); //set graph URI back to default parent::setGraphURI($graph_uri); // write the dataset description $this->setWriteFile($this->getParameterValue('outdir') . "/substances/" . $this->getBio2RDFReleaseFile()); $this->getWriteFile()->write($dataset_description); $this->getWriteFile()->close(); } else { echo "unable to read directory contents: " . $input_dir . "\n"; exit; } }
function Run() { // directory shortcuts $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); // get the listings page $url = trim(parent::getParameterValue('download_url')); $listing_file = $ldir . "probeset_list.html"; if (!file_exists($listing_file) || parent::getParameterValue("download") == "true") { echo "Downloading {$listing_file}" . PHP_EOL; Utils::DownloadSingle($url, $listing_file); } $listings = file_get_contents($listing_file); // make a list of the csv.zip files preg_match_all("/\"([^\"]+)\\.csv\\.zip\"/", $listings, $m); if (count($m[1]) == 0) { trigger_error("could not find any .csv.zip files in {$url}"); exit; } if (parent::getParameterValue("files") == 'all') { $myfiles = $m[1]; } else { $a = explode(",", parent::getParameterValue("files")); foreach ($a as $f) { $found = false; foreach ($m[1] as $n) { if (strstr($n, $f)) { $found = true; $myfiles[] = $n; break; } } if ($found === false) { echo "cannot find {$f} in list" . PHP_EOL; } } } if (!isset($myfiles)) { exit; } // nothing to do $dataset_description = ''; // set the write file $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; $outfile = 'affymetrix.' . parent::getParameterValue('output_format'); $this->setWriteFile($odir . $outfile, $gz); // iterate over the files foreach ($myfiles as $rfile) { $base_file = substr($rfile, strrpos($rfile, "/") + 1); $base_url = substr($rfile, 0, strrpos($rfile, "/")); // get and set the dataset version if (parent::getDatasetVersion() == null) { preg_match("/\\.na([0-9]{2})\\.annot/", $base_file, $m); if (isset($m[1])) { $this->setDatasetVersion($m[1]); } } if (parent::getDatasetVersion() != parent::getParameterValue('version')) { $base_file = str_replace("na" . parent::getDatasetVersion(), "na" . parent::getParameterValue('version'), $base_file); } $csv_file = $base_file . ".csv"; $zip_file = $csv_file . ".zip"; $lfile = $ldir . $zip_file; if (!file_exists($lfile)) { echo "skipping: {$lfile} does not exist" . PHP_EOL; continue; } echo "processing {$lfile}" . PHP_EOL; // open the zip file $zin = new ZipArchive(); if ($zin->open($lfile) === FALSE) { trigger_error("Unable to open {$lfile}"); exit; } if (($fp = $zin->getStream($csv_file)) === FALSE) { trigger_error("Unable to get {$csv_file} in ziparchive {$lfile}"); return FALSE; } parent::setReadFile($lfile); parent::getReadFile()->setFilePointer($fp); $this->parse($base_file); parent::getReadFile()->close(); parent::clear(); // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Affymetrix Probeset: {$base_file}")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/zip")->setPublisher("http://affymetrix.com")->setHomepage("http://www.affymetrix.com/support/technical/annotationfilesmain.affx")->setRights("use")->setRights("no-commercial")->setRights("registration-required")->setLicense("http://www.affymetrix.com/about_affymetrix/legal/index.affx")->setDataset("http://identifiers.org/affy.probeset/"); $dataset_description .= $source_file->toRDF(); } $this->getWriteFile()->close(); // write the dataset description $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = parent::getDate(filemtime($odir . $outfile)); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/affymetrix/affymetrix.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $output_file->toRDF(); // write the dataset description $this->setWriteFile($odir . $this->getBio2RDFReleaseFile()); $this->getWriteFile()->write($dataset_description); $this->getWriteFile()->close(); return true; }
function process() { // get the file list if (parent::getParameterValue('files') == 'all') { $files = explode("|", parent::getParameterList('files')); array_shift($files); } else { $files = explode(",", parent::getParameterValue('files')); } $dataset_description = ''; //set directory values $ldir = parent::getParameterValue('indir'); $rdir = parent::getParameterValue('download_url'); $odir = parent::getParameterValue('outdir'); $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } $gz_suffix = ".gz"; foreach ($files as $file) { if ($file == 'chem_gene_ixn_types') { $suffix = '.tsv'; } else { if ($file == 'exposure_ontology') { $suffix = '.obo'; } else { $suffix = ".tsv.gz"; } } $lfile = $ldir . $file . $gz_suffix; $rfile = $rdir . 'CTD_' . $file . $suffix; if (!file_exists($lfile)) { trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE); if ($suffix == ".tsv.gz") { Utils::DownloadSingle($rfile, $lfile); } else { Utils::DownloadSingle($rfile, "compress.zlib://" . $lfile); } } $out_suffix = parent::getParameterValue('output_format'); $ofile = "ctd_" . $file . "." . $out_suffix; $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } echo "Processing " . $file . " ..."; parent::setWriteFile($odir . $ofile, $gz); //set read file parent::setReadFile($lfile, TRUE); $fnx = "CTD_" . $file; $this->{$fnx}(); //close write file parent::getWriteFile()->close(); parent::clear(); echo "done!" . PHP_EOL; // generate the dataset release file echo "Generating dataset description... "; if ($file == "chemicals") { $dataset = "http://identifiers.org/ctd.chemical/"; } else { if ($file == "diseases") { $dataset = "http://identifiers.org/ctd.disease/"; } else { if ($file == "genes") { $dataset = "http://identifiers.org/ctd.gene/"; } else { $dataset = null; } } } // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Comparative Toxicogenomics Database ({$file}.{$gz_suffix}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://ctdbase.org/")->setHomepage("http://ctdbase.org/")->setRights("use")->setRights("by-attribution")->setRights("no-commercial")->setLicense("http://ctdbase.org/about/legal.jsp")->setDataset($dataset); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ctd/ctd.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); } parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function process($db) { $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); while ($l = parent::getReadFile()->read()) { list($nsid, $name) = explode("\t", $l); list($ns, $id) = explode(":", $nsid); if (isset($this->idlist) and !in_array($id, $this->idlist)) { continue; } if (isset($this->org)) { $id = $ns . "_" . $id; } $uri = $this->getNamespace() . $id; parent::addRDF(parent::describeIndividual($uri, $name, parent::getVoc() . ucfirst($db)) . parent::describeClass(parent::getVoc() . ucfirst($db), "KEGG {$db}") . parent::triplifyString($uri, parent::getVoc() . "internal-id", $nsid)); // now get the entries for each $lfile = $ldir . $id . ".txt"; $rfile = parent::getParameterValue("download_url") . "get/{$nsid}"; if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') { echo "downloading {$nsid} "; $ret = utils::downloadSingle($rfile, $lfile); if ($ret === false) { echo "unable to download " . $nsid . " ... skipping" . PHP_EOL; continue; } echo "done. "; } echo "parsing {$nsid} ... "; $this->parseEntry($lfile); parent::writeRDFBufferToWriteFile(); if ($db === "pathway") { $ko = str_replace("map", "ko", $id); $lfile = $ldir . $id . ".kgml"; $rfile = "http://www.kegg.jp/kegg-bin/download?entry=" . $ko . "&format=kgml"; if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') { echo "downloading KGML for {$nsid} "; $ret = utils::downloadSingle($rfile, $lfile); if ($ret === false) { echo "unable to download " . $nsid . " ... skipping" . PHP_EOL; continue; } echo "done. "; } $this->parseKGML($lfile); parent::writeRDFBufferToWriteFile(); } echo "done!" . PHP_EOL; } }
function Parse($xml) { // state the dataset info foreach ($xml->release->dbinfo as $o) { $db = $o->attributes()->dbname . " v" . $o->attributes()->version . " (" . $o->attributes()->entry_count . " entries) [" . $o->attributes()->file_date . "]"; parent::addRDF(parent::triplifyString(parent::getDatasetURI(), parent::getVoc() . "contains", $db)); if ((string) $o->attributes()->dbname === "INTERPRO") { parent::setDatasetVersion($o->attributes()->version); } } // get a potential id list if (parent::getParameterValue("id_list") != '') { $id_list = explode(",", parent::getParameterValue("id_list")); } // now interate over the entries foreach ($xml->interpro as $o) { parent::writeRDFBufferToWriteFile(); $interpro_id = $o->attributes()->id; if (isset($id_list) && !in_array($interpro_id, $id_list)) { continue; } echo "Processing {$interpro_id}" . PHP_EOL; $name = $o->name; $short_name = $o->attributes()->short_name; $type = $o->attributes()->type; $s = parent::getNamespace() . $interpro_id; //echo "Adding... $s rdfs:label $name ($short_name) $type [$s]".PHP_EOL; parent::addRDF(parent::describeIndividual($s, "{$name} ({$short_name}) {$type}", parent::getVoc() . $type)); // get the pubs unset($pubs); foreach ($o->pub_list->publication as $p) { $pid = (string) $p->attributes()->id; if (isset($p->db_xref)) { if ($p->db_xref->attributes()->db == "PUBMED") { $pmid = (string) $p->db_xref->attributes()->dbkey; $pubs['pid'][] = '<cite idref="' . $pid . '"/>'; $pubs['pmid'][] = '<a href="http://www.ncbi.nlm.nih.gov/pubmed/' . $pmid . '">pubmed:' . $pmid . '</a>'; parent::addRDF(parent::triplify($s, parent::getVoc() . "x-pubmed", "pubmed:{$pmid}")); } } } $abstract = (string) $o->abstract->p->asXML(); if (isset($pubs)) { $abstract = str_replace($pubs['pid'], $pubs['pmid'], $abstract); } parent::addRDF(parent::triplifyString($s, "dc:description", $abstract)); if (isset($o->example_list)) { foreach ($o->example_list->example as $example) { $db = (string) $example->db_xref->attributes()->db; $id = (string) $example->db_xref->attributes()->dbkey; parent::addRDF(parent::triplify($s, parent::getVoc() . "example-entry", "{$db}:{$id}")); } } if (isset($o->parent_list->rel_ref)) { foreach ($o->parent_list->rel_ref as $parent) { $id = (string) $parent->attributes()->ipr_ref; parent::addRDF(parent::triplify($s, parent::getVoc() . "parent", "interpro:{$id}")); } } if (isset($o->child->rel_ref)) { foreach ($o->child->rel_ref as $child) { $id = (string) $child->attributes()->ipr_ref; parent::addRDF(parent::triplify($s, parent::getVoc() . "child", "interpro:{$id}")); } } if (isset($o->contains->rel_ref)) { foreach ($o->contains->rel_ref as $contains) { $id = (string) $contains->attributes()->ipr_ref; parent::addRDF(parent::triplify($s, parent::getVoc() . "contains", "interpro:{$id}")); } } if (isset($o->found_in->rel_ref)) { foreach ($o->found_in->rel_ref as $f) { $id = (string) $f->attributes()->ipr_ref; parent::addRDF(parent::triplify($s, parent::getVoc() . "found-in", "interpro:{$id}")); } } if (isset($o->sec_list->sec_ac)) { foreach ($o->sec_ac as $s) { $id = (string) $s->attributes()->acc; parent::addRDF(parent::triplify($s, parent::getVoc() . "secondary-accession", "interpro:{$id}")); } } // xrefs if (isset($o->member_list->dbxref)) { foreach ($o->member_list->db_xref as $dbxref) { $db = (string) $dbxref->attributes()->db; $id = (string) $dbxref->attributes()->dbkey; parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}")); } } if (isset($o->external_doc_list)) { foreach ($o->external_doc_list->db_xref as $dbxref) { $db = (string) $dbxref->attributes()->db; $id = (string) $dbxref->attributes()->dbkey; parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}")); } } if (isset($o->structure_db_links->db_xref)) { foreach ($o->structure_db_links->db_xref as $dbxref) { $db = (string) $dbxref->attributes()->db; $id = (string) $dbxref->attributes()->dbkey; parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}")); } } // taxon distribution foreach ($o->taxonomy_distribution->taxon_data as $t) { $organism = (string) $t->attributes()->name; $number = (string) $t->attributes()->proteins_count; parent::addRDF(parent::triplifyString($s, parent::getVoc() . "taxon-distribution", "{$organism} ({$number})")); } } }
function Run() { // get the work if ($this->GetParameterValue('files') == 'all') { $sources = explode("|", parent::getParameterList('files')); array_shift($sources); } else { // comma separated list $sources = explode(",", parent::getParameterValue('files')); } $download_files = array("h**o-sapiens" => "Pathway%20Commons%202%20homo%20sapiens.BIOPAX.owl.gz", "hprd" => "Pathway%20Commons%202%20HPRD.BIOPAX.owl.gz", "humancyc" => "Pathway%20Commons%202%20HumanCyc.BIOPAX.owl.gz", "nci-nature" => "Pathway%20Commons%202%20NCI_Nature.BIOPAX.owl.gz", "panther-pathway" => "Pathway%20Commons%202%20PANTHER%20Pathway.BIOPAX.owl.gz", "phosphositeplus" => "Pathway%20Commons%202%20PhosphoSitePlus.BIOPAX.owl.gz", "reactome" => "Pathway%20Commons%202%20Reactome.BIOPAX.owl.gz"); $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } $dataset_description = ''; // iterate over the requested data foreach ($sources as $source) { echo "processing {$source}... "; $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); // set the remote and input files $file = $source . ".owl"; $zfile = $source . ".owl.gz"; $rfile = $rdir . $download_files[$source]; $lfile = $ldir . $zfile; // download if if the file doesn't exist locally or we are told to if (!file_exists($lfile) || $this->GetParameterValue('download') == 'true') { // download echo "downloading... "; file_put_contents($lfile, file_get_contents($rfile)); } // extract the file out of the ziparchive // and load into a buffer echo 'extracting... '; if (($fpin = gzopen($lfile, "r")) === FALSE) { trigger_error("Unable to open {$lfile}", E_USER_ERROR); exit; } $data = ''; while (!gzeof($fpin)) { $buffer = gzgets($fpin, 4096); $data .= $buffer; } gzclose($fpin); // set the output file $suffix = parent::getParameterValue('output_format'); $outfile = $source . '.' . $suffix; $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } parent::setWriteFile($odir . $outfile, $gz); // send for parsing $p = new BioPAX2Bio2RDF($this); $p->SetBuffer($data)->SetBioPAXVersion(3)->SetBaseNamespace("http://purl.org/pc2/3/")->SetBio2RDFNamespace("http://bio2rdf.org/pathwaycommons:")->SetDatasetURI(parent::getDatasetURI()); $rdf = $p->Parse(); parent::addRDF($rdf); // write to output parent::writeRDFBufferToWriteFile(); parent::getWriteFile()->Close(); echo "done!" . PHP_EOL; //generate dataset description echo "Generating dataset description for {$zfile}... "; $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Pathway Commons")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("rdf/xml")->setPublisher("http://www.pathwaycommons.org/")->setHomepage("http://www.pathwaycommons.org/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("http://www.pathwaycommons.org/pc2/home.html#data_sources")->setDataset("http://identifiers.org/pathwaycommons/"); $dataset_description .= $source_file->toRDF(); echo "done!" . PHP_EOL; } echo "Generating dataset description for Bio2RDF Pathways Commons dataset... "; $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pathwaycommons/pathwaycommons.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $output_file->toRDF(); //write dataset description to file parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }