function process_dir() { $this->setCheckPoint('dataset'); $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $this->id_list = null; if (parent::getParameterValue('id_list') != '') { $this->id_list = array_flip(explode(",", trim(parent::getParameterValue("id_list")))); } $graph_uri = parent::getGraphURI(); $dataset_description = ''; $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; //set graph URI to dataset graph if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } $files = glob($ldir . "*.xml.gz"); foreach ($files as $i => $file) { echo "Processing {$file} (" . ($i + 1) . "/" . count($files) . ") ..."; $this->process_file($file); parent::clear(); echo "done!" . PHP_EOL; } $source_file = (new DataResource($this))->setURI("http://www.ncbi.nlm.nih.gov/pubmed")->setTitle("NCBI PubMed")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($ldir)))->setFormat("text/xml")->setPublisher("http://ncbi.nlm.nih.gov/")->setHomepage("http://www.ncbi.nlm.nih.gov/pubmed/")->setRights("use-share-modify")->setLicense("http://www.nlm.nih.gov/databases/license/license.html")->setDataset("http://identifiers.org/pubmed/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pubmed/pubmed.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); //set graph URI back to default parent::setGraphURI($graph_uri); // write the dataset description $this->setWriteFile($odir . $this->getBio2RDFReleaseFile()); $this->getWriteFile()->write($dataset_description); $this->getWriteFile()->close(); }
function process() { // get the file list if (parent::getParameterValue('files') == 'all') { $files = explode("|", parent::getParameterList('files')); array_shift($files); } else { $files = explode(",", parent::getParameterValue('files')); } $dataset_description = ''; //set directory values $ldir = parent::getParameterValue('indir'); $rdir = parent::getParameterValue('download_url'); $odir = parent::getParameterValue('outdir'); $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } $gz_suffix = ".gz"; foreach ($files as $file) { if ($file == 'chem_gene_ixn_types') { $suffix = '.tsv'; } else { if ($file == 'exposure_ontology') { $suffix = '.obo'; } else { $suffix = ".tsv.gz"; } } $lfile = $ldir . $file . $gz_suffix; $rfile = $rdir . 'CTD_' . $file . $suffix; if (!file_exists($lfile)) { trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE); if ($suffix == ".tsv.gz") { Utils::DownloadSingle($rfile, $lfile); } else { Utils::DownloadSingle($rfile, "compress.zlib://" . $lfile); } } $out_suffix = parent::getParameterValue('output_format'); $ofile = "ctd_" . $file . "." . $out_suffix; $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } echo "Processing " . $file . " ..."; parent::setWriteFile($odir . $ofile, $gz); //set read file parent::setReadFile($lfile, TRUE); $fnx = "CTD_" . $file; $this->{$fnx}(); //close write file parent::getWriteFile()->close(); parent::clear(); echo "done!" . PHP_EOL; // generate the dataset release file echo "Generating dataset description... "; if ($file == "chemicals") { $dataset = "http://identifiers.org/ctd.chemical/"; } else { if ($file == "diseases") { $dataset = "http://identifiers.org/ctd.disease/"; } else { if ($file == "genes") { $dataset = "http://identifiers.org/ctd.gene/"; } else { $dataset = null; } } } // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Comparative Toxicogenomics Database ({$file}.{$gz_suffix}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://ctdbase.org/")->setHomepage("http://ctdbase.org/")->setRights("use")->setRights("by-attribution")->setRights("no-commercial")->setLicense("http://ctdbase.org/about/legal.jsp")->setDataset($dataset); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ctd/ctd.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); } parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
public function run() { if (parent::getParameterValue('files') == 'all') { $files = explode("|", parent::getParameterList('files')); array_shift($files); } else { $files = explode(",", parent::getParameterValue('files')); } $release = parent::getParameterValue('release'); $releaseb = "WS249"; $remote_files = array("geneIDs" => "species/c_elegans/annotation/geneIDs/c_elegans.PRJNA13758." . $release . ".geneIDs.txt.gz", "functional_descriptions" => "species/c_elegans/annotation/functional_descriptions/c_elegans.PRJNA13758." . $release . ".functional_descriptions.txt.gz", "gene_interactions" => "species/c_elegans/annotation/gene_interactions/c_elegans.PRJNA13758." . $release . ".gene_interactions.txt.gz", "gene_associations" => "releases/current-production-release/ONTOLOGY/gene_association." . $releaseb . ".wb", "phenotype_associations" => "releases/current-production-release/ONTOLOGY/phenotype_association." . $releaseb . ".wb"); $local_files = array("geneIDs" => "wormbase." . parent::getParameterValue('release') . ".genes.txt.gz", "functional_descriptions" => "wormbase." . parent::getParameterValue('release') . ".functional_descriptions.txt.gz", "gene_interactions" => "wormbase." . parent::getParameterValue('release') . ".gene_interactions.txt.gz", "gene_associations" => "wormbase." . parent::getParameterValue('release') . ".gene_association.wb", "phenotype_associations" => "wormbase." . parent::getParameterValue('release') . ".phenotype_associations.wb"); $idir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); $dataset_description = ''; $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } foreach ($files as $file) { $lfile = $idir . $local_files[$file]; $rfile = $rdir . $remote_files[$file]; if (!file_exists($lfile) or parent::getParameterValue('download') == true) { trigger_error($lfile . " not found. Will attempt to download." . PHP_EOL, E_USER_WARNING); echo "Downloading {$rfile}... "; Utils::DownloadSingle($rfile, $lfile); echo "done!" . PHP_EOL; } if (strstr($lfile, "gz")) { parent::setReadFile($lfile, TRUE); } else { parent::setReadFile($lfile, FALSE); } $suffix = parent::getParameterValue('output_format'); $ofile = "wormbase." . $file . "." . $suffix; $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false; parent::setWriteFile($odir . $ofile, $gz); echo "Processing {$file}... "; $fnx = $file; $this->{$fnx}(); echo "done!" . PHP_EOL; parent::getWriteFile()->close(); // generate the dataset release file echo "Generating dataset description for {$ofile}... "; // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("WormBase Release " . parent::getParameterValue('release') . " subset ({$file})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://wormbase.org/")->setHomepage("http://wormbase.org/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("http://www.wormbase.org/about/policies")->setDataset("http://identifiers.org/wormbase/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/wormbase/wormbase.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); echo "done!" . PHP_EOL; } parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); }
/** * Function to begin parsing the local copy of the pubchem substances directory **/ function parse_substances() { $ignore = array(".", ".."); $input_dir = $this->getParameterValue('indir') . "/substances/"; $gz = false; $this->CreateDirectory($this->getParameterValue('outdir') . "/substances/"); parent::setDatasetURI("bio2rdf_dataset:bio2rdf-" . $this->getPcsPrefix() . "-" . date("Ymd")); $graph_uri = parent::getGraphURI(); //set graph URI to dataset uri if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } $dataset_description = ''; if ($handle = opendir($input_dir)) { while (false !== ($file = readdir($handle))) { if (in_array($file, $ignore)) { continue; } echo "Processing file: " . $input_dir . $file . PHP_EOL; $suffix = parent::getParameterValue('output_format'); $outfile = realpath($this->getParameterValue('outdir')) . "/substances/" . basename($file, ".xml.gz") . "." . $suffix; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } echo "... into " . $outfile . PHP_EOL; parent::setCheckpoint('file'); $this->setWriteFile($outfile, $gz); $this->parse_substance_file($input_dir, $file); $this->getWriteFile()->close(); } closedir($handle); $source_file = (new DataResource($this))->setURI("http://www.ncbi.nlm.nih.gov/pcsubstance")->setTitle("PubChem Substance")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($input_dir)))->setFormat("text/xml")->setFormat("application/zip")->setPublisher("http://ncbi.nlm.nih.gov/")->setHomepage("http://pubchem.ncbi.nlm.nih.gov/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("ftp://ftp.ncbi.nlm.nih.gov/pubchem/README")->setDataset("http://identifiers.org/pubchem.substance/"); $prefix = $this->getPcsPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pubchem/pubchem.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); //set graph URI back to default parent::setGraphURI($graph_uri); // write the dataset description $this->setWriteFile($this->getParameterValue('outdir') . "/substances/" . $this->getBio2RDFReleaseFile()); $this->getWriteFile()->write($dataset_description); $this->getWriteFile()->close(); } else { echo "unable to read directory contents: " . $input_dir . "\n"; exit; } }
function process() { $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); //which files are to be converted? $files = trim($this->GetParameterValue('files')); if ($files == 'all') { $files = $this->getPackageMap(); } else { $sel_arr = explode(",", $files); $pm = $this->getPackageMap(); $files = array(); foreach ($sel_arr as $a) { if (array_key_exists($a, $pm)) { $files[$a] = $pm[$a]; } } } if ($this->getParameterValue('limit_organisms') == true) { $this->taxids = array_flip(explode(",", $this->getParameterValue('organisms'))); } //set dataset graph to be dataset URI $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } $dataset_description = ''; //now iterate over the files array foreach ($files as $module => $rfilename) { $file = $module . ".gz"; $lfile = $ldir . $file; $rfile = $rdir . $rfilename; // download if (!file_exists($lfile) || parent::getParameterValue('download') == true) { trigger_error("{$lfile} not found. Will attempt to download.", E_USER_NOTICE); $myfile = $lfile; if ($module == "gene2sts" || $module == "gene2unigene") { $myfile = "compress.zlib://" . $lfile; } echo "downloading {$module} ..."; utils::DownloadSingle($rfile, $myfile); echo "done" . PHP_EOL; } } foreach ($files as $module => $rfilename) { $file = $module . ".gz"; $lfile = $ldir . $file; $rfile = $rdir . $rfilename; $ofile = $module . "." . parent::getParameterValue('output_format'); $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } echo "Processing {$module} ... "; parent::setReadFile($lfile, true); parent::setWriteFile($odir . $ofile, $gz); $fnx = $module; if ($module == 'gene2refseq') { $fnx = 'gene2accession'; } $this->{$fnx}(); parent::clear(); echo 'done!' . PHP_EOL; parent::getReadFile()->close(); parent::getWriteFile()->close(); // generate the dataset release file // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("NCBI Gene ({$module})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://www.ncbi.nlm.nih.gov")->setHomepage("http://www.ncbi.nlm.nih.gov/gene")->setRights("use-share-modify")->setLicense("http://www.ncbi.nlm.nih.gov/About/disclaimer.html")->setDataset("http://identifiers.org/ncbigene/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ncbigene/ncbigene.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); } //foreach //set graph URI back to default value parent::setGraphURI($graph_uri); //write dataset description to file echo "Generating dataset description... "; parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function Run() { $idir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $files = parent::getParameterValue('files'); // set the work if ($files != 'all') { // check if comma-separated, or hyphen-range $list = explode(",", $files); if (count($list) == 1) { // try hyphen separated $range = explode("-", $files); if (count($range) == 2) { for ($i = $range[0]; $i <= $range[1]; $i++) { $myfiles[] = $i; } } else { // must a single entry $myfiles[] = $files; } } else { $myfiles = $list; } } $rest_uri = 'http://sabiork.h-its.org/sabioRestWebServices/'; $getReactionIds_url = $rest_uri . "suggestions/SABIOReactionIDs"; $reaction_list_file = $idir . "reactions.xml"; if (!file_exists($reaction_list_file) || parent::getParameterValue('download') == 'true') { $xml = file_get_contents($getReactionIds_url); if (FALSE === $reaction_list_file) { exit; } $f = new FileFactory($reaction_list_file); $f->Write($xml); $f->Close(); } $xml = simplexml_load_file($reaction_list_file); $total = count($xml->SABIOReactionID); if (isset($myfiles)) { $total = count($myfiles); } $i = 0; parent::setCheckpoint('dataset'); $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } $suffix = parent::getParameterValue('output_format'); $ofile = "sabiork." . $suffix; $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false; parent::setWriteFile($odir . $ofile, $gz); foreach ($xml->SABIOReactionID as $rid) { parent::setCheckpoint('file'); if (isset($myfiles)) { if (!in_array($rid, $myfiles)) { continue; } } $i++; echo "{$i} / {$total} : reaction {$rid}" . PHP_EOL; $reaction_file = $idir . "reaction_" . $rid . ".owl.gz"; if (!file_exists($reaction_file) || $this->GetParameterValue('download') == 'true') { $url = $rest_uri . 'searchKineticLaws/biopax?q=SabioReactionID:' . $rid; $data = file_get_contents($url); if ($data === FALSE) { continue; } $f = new FileFactory($reaction_file, true); $f->Write($data); $f->Close(); } $buf = file_get_contents("compress.zlib://" . $reaction_file); // send for parsing $p = new BioPAX2Bio2RDF($this); $p->SetBuffer($buf)->SetBioPAXVersion(3)->SetBaseNamespace("http://sabio.h-its.org/biopax#")->SetBio2RDFNamespace("http://bio2rdf.org/sabiork:")->SetDatasetURI($this->GetDatasetURI()); $rdf = $p->Parse(); parent::getWriteFile()->Write($rdf); } parent::getWriteFile()->Close(); //generate dataset description echo "Generating dataset description... "; $source_file = (new DataResource($this))->setURI("http://sabiork.h-its.org/sabioRestWebServices/searchKineticLaws/biopax")->setTitle("SABIO-RK Biochemical Reaction Kinetics Database")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($odir . $ofile)))->setFormat("text/xml")->setPublisher("http://sabio.villa-bosch.de/")->setHomepage("http://sabio.villa-bosch.de/")->setRights("use-share-modify")->setRights("no-commercial")->setLicense("http://sabio.villa-bosch.de/layouts/content/termscondition.gsp")->setDataset("http://identifiers.org/sabiork.reaction/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sabiork/sabiork.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description = $source_file->toRDF() . $output_file->toRDF(); //write dataset description to file parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function process() { if (parent::getParameterValue('files') == 'all') { $files = explode("|", parent::getParameterList('files')); array_shift($files); } else { $files = explode(",", parent::getParameterValue('files')); } $remote_files = array("human" => "human_genes.zip", "models" => "models_genes.zip"); $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); $dataset_description = ''; $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } foreach ($files as $file) { $lfile = $ldir . $remote_files[$file]; $rfile = $rdir . $remote_files[$file]; if (!file_exists($lfile)) { trigger_error($lfile . " not found. Will attempt to download." . PHP_EOL, E_USER_WARNING); echo "Downloading {$rfile}... "; Utils::DownloadSingle($rfile, $lfile); echo "done!" . PHP_EOL; } $suffix = parent::getParameterValue('output_format'); $ofile = "genage_" . $file . '.' . $suffix; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } $zin = new ZipArchive(); if ($zin->open($lfile) === FALSE) { trigger_error("Unable to open {$lfile}"); exit; } if ($file == "human") { $zipentry = "genage_human.csv"; } else { if ($file == "models") { $zipentry = "genage_models.csv"; } } if (($fp = $zin->getStream($zipentry)) === FALSE) { trigger_error("Unable to get {$zipentry} in ziparchive {$lfile}"); return FALSE; } parent::SetReadFile($lfile); parent::GetReadFile()->SetFilePointer($fp); // set the write file, parse, write and close $suffix = parent::getParameterValue('output_format'); $outfile = "genage_" . $file . '.' . $suffix; $gz = false; if (strstr($suffix, "gz")) { $gz = true; } parent::setWriteFile($odir . $ofile, $gz); echo "Processing {$lfile}... "; $fnx = $file; $this->{$fnx}(); echo "done!" . PHP_EOL; parent::getWriteFile()->close(); // generate the dataset release file echo "Generating dataset description for {$ofile}... "; // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Human Ageing Genomic Resources GenAge database (" . $remote_files[$file] . ")")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/comma-separated-value")->setFormat("application/gzip")->setPublisher("http://genomics.senescence.info/")->setHomepage("http://genomics.senescence.info/genes/")->setRights("use")->setLicense("http://genomics.senescence.info/legal.html")->setDataset("http://identifiers.org/genage/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/genage/genage.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); echo "done!" . PHP_EOL; } parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); }
function Run() { // get the work if ($this->GetParameterValue('files') == 'all') { $sources = explode("|", parent::getParameterList('files')); array_shift($sources); } else { // comma separated list $sources = explode(",", parent::getParameterValue('files')); } $download_files = array("h**o-sapiens" => "Pathway%20Commons%202%20homo%20sapiens.BIOPAX.owl.gz", "hprd" => "Pathway%20Commons%202%20HPRD.BIOPAX.owl.gz", "humancyc" => "Pathway%20Commons%202%20HumanCyc.BIOPAX.owl.gz", "nci-nature" => "Pathway%20Commons%202%20NCI_Nature.BIOPAX.owl.gz", "panther-pathway" => "Pathway%20Commons%202%20PANTHER%20Pathway.BIOPAX.owl.gz", "phosphositeplus" => "Pathway%20Commons%202%20PhosphoSitePlus.BIOPAX.owl.gz", "reactome" => "Pathway%20Commons%202%20Reactome.BIOPAX.owl.gz"); $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } $dataset_description = ''; // iterate over the requested data foreach ($sources as $source) { echo "processing {$source}... "; $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); // set the remote and input files $file = $source . ".owl"; $zfile = $source . ".owl.gz"; $rfile = $rdir . $download_files[$source]; $lfile = $ldir . $zfile; // download if if the file doesn't exist locally or we are told to if (!file_exists($lfile) || $this->GetParameterValue('download') == 'true') { // download echo "downloading... "; file_put_contents($lfile, file_get_contents($rfile)); } // extract the file out of the ziparchive // and load into a buffer echo 'extracting... '; if (($fpin = gzopen($lfile, "r")) === FALSE) { trigger_error("Unable to open {$lfile}", E_USER_ERROR); exit; } $data = ''; while (!gzeof($fpin)) { $buffer = gzgets($fpin, 4096); $data .= $buffer; } gzclose($fpin); // set the output file $suffix = parent::getParameterValue('output_format'); $outfile = $source . '.' . $suffix; $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } parent::setWriteFile($odir . $outfile, $gz); // send for parsing $p = new BioPAX2Bio2RDF($this); $p->SetBuffer($data)->SetBioPAXVersion(3)->SetBaseNamespace("http://purl.org/pc2/3/")->SetBio2RDFNamespace("http://bio2rdf.org/pathwaycommons:")->SetDatasetURI(parent::getDatasetURI()); $rdf = $p->Parse(); parent::addRDF($rdf); // write to output parent::writeRDFBufferToWriteFile(); parent::getWriteFile()->Close(); echo "done!" . PHP_EOL; //generate dataset description echo "Generating dataset description for {$zfile}... "; $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Pathway Commons")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("rdf/xml")->setPublisher("http://www.pathwaycommons.org/")->setHomepage("http://www.pathwaycommons.org/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("http://www.pathwaycommons.org/pc2/home.html#data_sources")->setDataset("http://identifiers.org/pathwaycommons/"); $dataset_description .= $source_file->toRDF(); echo "done!" . PHP_EOL; } echo "Generating dataset description for Bio2RDF Pathways Commons dataset... "; $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pathwaycommons/pathwaycommons.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $output_file->toRDF(); //write dataset description to file parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function Run() { // directory shortcuts $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); // get the work specified $list = trim(parent::getParameterValue('files')); if ($list == 'all') { // call the getAllModelsId webservice $file = $ldir . "all_models.json"; if (!file_exists($file)) { try { $x = @new SoapClient("http://www.ebi.ac.uk/biomodels-main/services/BioModelsWebServices?wsdl"); } catch (Exception $e) { echo $e->getMessage(); } $entries = $x->getAllModelsId(); file_put_contents($file, json_encode($entries)); } else { $entries = json_decode(file_get_contents($file)); } } elseif ($list == 'curated') { // call the getAllCuratedModelsId webservice $file = $ldir . "curated_models.json"; if (!file_exists($file)) { try { $x = @new SoapClient("http://www.ebi.ac.uk/biomodels-main/services/BioModelsWebServices?wsdl"); } catch (Exception $e) { echo $e->getMessage(); } $entries = $x->getAllCuratedModelsId(); file_put_contents($file, json_encode($entries)); } else { $entries = json_decode(file_get_contents($file)); } } else { // check if a hyphenated list was provided if (($pos = strpos($list, "-")) !== FALSE) { $start_range = substr($list, 0, $pos); $end_range = substr($list, $pos + 1); for ($i = $start_range; $i <= $end_range; $i++) { $entries[] = "BIOMD" . str_pad($i, 10, "0", STR_PAD_LEFT); } } else { // for comma separated list $b = explode(",", $this->GetParameterValue('files')); foreach ($b as $e) { $entries[] = "BIOMD" . str_pad($e, 10, "0", STR_PAD_LEFT); } } } $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } // set the write file $suffix = parent::getParameterValue('output_format'); $outfile = 'biomodels' . '.' . $suffix; $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } $dataset_description = ''; parent::setWriteFile($odir . $outfile, $gz); // iterate over the entries $i = 0; $total = count($entries); foreach ($entries as $id) { echo "processing " . ++$i . " of {$total} - biomodel# " . $id; $download_file = $ldir . $id . ".owl.gz"; $url = parent::getParameterValue('download_url') . "publ/{$id}/{$id}-biopax3.owl"; // download if the file doesn't exist or we are told to if (!file_exists($download_file) || $this->GetParameterValue('download') == 'true') { // download echo " - downloading"; $ret = utils::downloadsingle($url, 'compress.zlib://' . $download_file, true); if ($ret === false) { echo "\nTrying non-curated model"; $url = parent::getParametervalue('download_url') . "uncura_publ/{$id}/{$id}-biopax3.owl"; $ret = utils::downloadsingle($url, 'compress.zlib://' . $download_file, true); if ($ret === false) { continue; } } echo " - downloaded"; } // load entry, parse and write to file echo " - parsing... "; // $this->SetReadFile($download_file,true); $buf = file_get_contents("compress.zlib://" . $download_file); $converter = new BioPAX2Bio2RDF($this); $converter->SetBuffer($buf)->SetBioPAXVersion(3)->SetBaseNamespace("http://identifiers.org/biomodels.db/{$id}/")->SetBio2RDFNamespace("http://bio2rdf.org/biomodels:" . $id . "_")->SetDatasetURI($this->GetDatasetURI()); $rdf = $converter->Parse(); parent::addRDF($rdf); parent::writeRDFBufferToWriteFile(); //generate dataset description $source_file = (new DataResource($this))->setURI($url)->setTitle("EBI BioModels Database - BioModel # {$id}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($download_file)))->setFormat("rdf/xml")->setPublisher("http://www.ebi.ac.uk/")->setHomepage("http://www.ebi.ac.uk/biomodels-main/")->setRights("use-share-modify")->setLicense("http://www.ebi.ac.uk/biomodels-main/termsofuse")->setDataset("http://identifiers.org/biomodels.db/"); $dataset_description .= $source_file->toRDF(); echo "done!" . PHP_EOL; } //foreach parent::getWriteFile()->close(); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/biomodels/biomodels.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $output_file->toRDF(); //write dataset description to file parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function run() { if (parent::getParameterValue('files') == 'all') { $files = explode("|", $this->GetParameterList('files')); array_shift($files); } else { $files = explode(",", parent::getParameterValue('files')); } $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); $dataset_description = ''; foreach ($files as $file) { $download = parent::getParameterValue('download'); $lfile = $ldir . "goa_" . $file . ".gz"; if (!file_exists($lfile) && $download == false) { trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE); $download = true; } //download file $rfile = $rdir . strtoupper($file) . "/gene_association.goa_" . $file . ".gz"; if ($download == true) { echo "downloading {$file} ... "; //file_put_contents($lfile,file_get_contents($rfile)); utils::DownloadSingle($rfile, $lfile); } $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; $ofile = "goa_" . $file . "." . parent::getParameterValue('output_format'); parent::setReadFile($lfile, TRUE); parent::setWriteFile($odir . $ofile, $gz); echo "processing {$file} ... "; $this->process($file); echo "done!"; parent::clear(); //close write file parent::getWriteFile()->close(); echo PHP_EOL; // dataset description $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Gene Ontology Annotation file {$file} ({$rfile}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://www.ebi.ac.uk/")->setHomepage("http://www.ebi.ac.uk/GOA/")->setRights("use")->setLicense("http://www.ebi.ac.uk/GOA/goaHelp.html")->setDataset("http://identifiers.org/goa/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/irefindex/irefindex.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); parent::setGraphURI($graph_uri); } parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function Run() { // get the file list if (parent::getParameterValue('files') == 'all') { $files = array('all'); } else { $files = explode(",", parent::getParameterValue('files')); } $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); $dataset_description = ''; foreach ($files as $file) { $download = parent::getParameterValue('download'); $version = parent::getParameterValue("version"); $zip_file = ucfirst($file) . ".mitab." . $version . ".txt.zip"; $lfile = $ldir . $zip_file; $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; $ofile = "irefindex-" . $file . "." . parent::getParameterValue('output_format'); if (!file_exists($lfile)) { trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE); $download = true; } $rfile = $rdir . $zip_file; if ($download == true) { echo "downloading {$rfile}" . PHP_EOL; if (FALSE === Utils::DownloadSingle($rfile, $lfile)) { trigger_error("Error in Download"); return FALSE; } } $zin = new ZipArchive(); if ($zin->open($lfile) === FALSE) { trigger_error("Unable to open {$lfile}"); exit; } if ($zin->numFiles != 1) { trigger_error("Found more than one file ... using first file"); } $f = $zin->statIndex(0); $base_file = $f['name']; if (($fp = $zin->getStream($base_file)) === FALSE) { trigger_error("Unable to get {$base_file} in ziparchive {$lfile}"); return FALSE; } parent::setReadFile($lfile); parent::getReadFile()->setFilePointer($fp); echo "Processing " . $file . " ..."; parent::setWriteFile($odir . $ofile, true); if ($this->Parse() === FALSE) { trigger_error("Parsing Error"); exit; } parent::writeRDFBufferToWriteFile(); parent::getWriteFile()->close(); $zin->close(); echo "Done!" . PHP_EOL; $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("iRefIndex ({$zip_file}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/zip")->setPublisher("http://irefindex.uio.no")->setHomepage("http://irefindex.uio.no")->setRights("use")->setRights("by-attribution")->setRights("no-commercial")->setLicense("http://irefindex.uio.no/wiki/README_MITAB2.6_for_iRefIndex#License")->setDataset("http://identifiers.org/irefindex/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/irefindex/irefindex.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); parent::setGraphURI($graph_uri); } parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); return TRUE; }
function process() { if (parent::getParameterValue('files') == 'all') { $files = explode("|", parent::getParameterList('files')); array_shift($files); } else { $files = explode(",", parent::getParameterValue('files')); } $ldir = parent::getParameterValue('indir'); $rdir = parent::getParameterValue('download_url'); $odir = parent::getParameterValue('outdir'); $rfiles = array("dbxref" => "curation/chromosomal_feature/dbxref.tab", "features" => "curation/chromosomal_feature/SGD_features.tab", "domains" => "curation/calculated_protein_info/domains/domains.tab", "protein" => "curation/calculated_protein_info/protein_properties.tab", "goa" => "curation/literature/gene_association.sgd.gz", "goslim" => "curation/literature/go_slim_mapping.tab", "complex" => "curation/literature/go_protein_complex_slim.tab", "interaction" => "curation/literature/interaction_data.tab", "phenotype" => "curation/literature/phenotype_data.tab", "pathways" => "curation/literature/biochemical_pathways.tab", "mapping" => "mapping"); $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } if (parent::getParameterValue('one_file') == true) { $ofile = "sgd." . parent::getParameterValue('output_format'); parent::setWriteFile($odir . $ofile, $gz); } $dataset_description = ''; foreach ($files as $file) { $ext = substr(strrchr($rfiles[$file], '.'), 1); if ($ext == "tab") { $lfile = "sgd_" . $file . ".tab"; } elseif ($ext = "gz") { $lfile = "sgd_" . $file . ".tab.gz"; } $rfile = $rdir . $rfiles[$file]; if (!file_exists($ldir . $lfile) && parent::getParameterValue('download') == false && $file != 'mapping') { trigger_error($ldir . $lfile . " not found. Will attempt to download.", E_USER_NOTICE); Utils::DownloadSingle($rfile, $ldir . $lfile); } if (parent::getParameterValue('one_file') == false) { $ofile = "sgd_" . $file . '.' . parent::getParameterValue('output_format'); parent::setWriteFile($odir . $ofile, $gz); } //parse file parent::setReadFile($ldir . $lfile, $gz); $fnx = $file; echo "Processing {$file}... "; $this->{$fnx}(); echo PHP_EOL . "done!"; //write RDF to file parent::writeRDFBufferToWriteFile(); //close write file if (parent::getParameterValue('one_file') == false) { parent::getWriteFile()->close(); } echo PHP_EOL; // generate the dataset release file // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Saccharomyces Genome Database ({$file})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($ldir . $lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://www.yeastgenome.org/")->setHomepage("http://www.yeastgenome.org/")->setRights("use")->setLicense("http://www.stanford.edu/site/terms.html")->setDataset("http://identifiers.org/sgd/"); $dataset_description .= $source_file->toRDF(); if (parent::getParameterValue('one_file') == false) { $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sgd/sgd.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $output_file->toRDF(); } } //foreach //set graph URI back to default parent::setGraphURI($graph_uri); if (parent::getParameterValue('one_file') == true) { $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sgd/sgd.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $source_file->toRDF() . $output_file->toRDF(); } //write dataset description to file echo "Generating dataset description... " . PHP_EOL; parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function Run() { $indir = parent::getParameterValue('indir'); $outdir = parent::getParameterValue('outdir'); $download_url = parent::getParameterValue('download_url'); if (parent::getParameterValue('files') == 'all') { $files = explode("|", parent::getParameterList('files')); array_shift($files); } else { $files = explode("|", parent::getParameterValue('files')); } if (parent::getParameterValue("id_list")) { $this->id_list = array_flip(explode(",", parent::getParameterValue('id_list'))); } $dataset_description = ''; foreach ($files as $f) { if ($f == 'drugbank') { $file = 'drugbank.xml.zip'; $lname = 'drugbank'; } $fnx = 'parse_' . $f; $rfile = parent::getParameterValue('download_url') . $file; $lfile = parent::getParameterValue('indir') . $file; $cfile = $lname . "." . parent::getParameterValue('output_format'); // download if (!file_exists($lfile) || parent::getParameterValue('download') == true) { utils::downloadSingle($rfile, $lfile); } // setup the write $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; parent::setWriteFile($outdir . $cfile, $gz); echo $outdir . $cfile; if (file_exists($indir . $file)) { // call the parser echo "processing {$file} ..." . PHP_EOL; $this->{$fnx}($indir, $file); echo "done" . PHP_EOL; parent::clear(); } parent::getWriteFile()->close(); // dataset description $ouri = parent::getGraphURI(); parent::setGraphURI(parent::getDatasetURI()); $source_version = parent::getDatasetVersion(); $bVersion = parent::getParameterValue('bio2rdf_release'); $prefix = parent::getPrefix(); $date = date("Y-m-d\\TH:i:sP"); // dataset description $source_file = (new DataResource($this))->setURI($rfile)->setTitle("DrugBank ({$file})")->setRetrievedDate(date("Y-m-d\\TH:i:sP", filemtime($indir . $file)))->setFormat("application/xml")->setFormat("application/zip")->setPublisher("http://drugbank.ca")->setHomepage("http://drugbank.ca")->setRights("use")->setRights("by-attribution")->setRights("no-commercial")->setLicense("http://www.drugbank.ca/about")->setDataset("http://identifiers.org/drugbank/"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$cfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} v{$source_version}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/drugbank/drugbank.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } parent::writeToReleaseFile($source_file->toRDF() . $output_file->toRDF()); parent::setGraphURI($ouri); } parent::closeReleaseFile(); }
function OBO2RDF($abbv) { $abbv = strtolower($abbv); if ($abbv == "doid") { $abbv = "do"; } $minimal = parent::getParameterValue('detail') == 'min' ? true : false; $minimalp = parent::getParameterValue('detail') == 'min+' ? true : false; $version = parent::getParameterValue("bio2rdf_release"); $tid = ''; $first = true; $is_a = false; $is_deprecated = false; $min = $buf = ''; $ouri = "http://bio2rdf.org/lsr:" . $abbv; $dataset_uri = $abbv . "_resource:bio2rdf.dataset.{$abbv}.R" . $version; parent::setGraphURI($dataset_uri); $buf = parent::triplify($ouri, "rdf:type", "owl:Ontology"); $graph_uri = '<' . parent::getRegistry()->getFQURI(parent::getGraphURI()) . '>'; $bid = 1; while ($l = parent::getReadFile()->read()) { $lt = trim($l); if (strlen($lt) == 0) { continue; } if ($lt[0] == '!') { continue; } if (strstr($l, "[Term]")) { // first node? if ($first == true) { // ignore the first case $first = false; } else { if ($tid != '' && $is_a == false && $is_deprecated == false) { $t = parent::triplify($tid, "rdfs:subClassOf", "obo_vocabulary:Entity"); $buf .= $t; $min .= $t; } } $is_a = false; $is_deprecated = false; unset($typedef); $term = ''; $tid = ''; continue; } else { if (strstr($l, "[Typedef]")) { $is_a = false; $is_deprecated = false; unset($term); $tid = ''; $typedef = ''; continue; } } //echo "LINE: $l".PHP_EOL; // to fix error in obo generator $lt = str_replace("synonym ", "synonym: ", $lt); $lt = preg_replace("/\\{.*\\} !/", " !", $lt); $a = explode(" !", $lt); if (isset($a[1])) { $exc = trim($a[1]); } $a = explode(": ", trim($a[0]), 2); // let's go if (isset($intersection_of)) { if ($a[0] != "intersection_of") { // $intersection_of .= ")].".PHP_EOL; //$buf .= $intersection_of; if ($minimalp) { $min .= $intersection_of; } unset($intersection_of); } } if (isset($relationship)) { if ($a[0] != "relationship") { // $relationship .= ")].".PHP_EOL; //$buf .= $relationship; if ($minimalp) { $min .= $relationship; } unset($relationship); } } if (isset($typedef)) { if ($a[0] == "id") { $c = explode(":", $a[1]); if (count($c) == 1) { $ns = "obo"; $id = $c[0]; } else { $ns = strtolower($c[0]); $id = $c[1]; } $id = str_replace(array("(", ")"), array("_", ""), $id); $tid = $ns . ":" . $id; } else { if ($a[0] == "name") { $buf .= parent::describeClass($tid, addslashes(stripslashes($a[1]))); } else { if ($a[0] == "is_a") { if (FALSE !== ($pos = strpos($a[1], "!"))) { $a[1] = substr($a[1], 0, $pos - 1); } $buf .= parent::triplify($tid, "rdfs:subPropertyOf", "obo_vocabulary:" . strtolower($a[1])); } else { if ($a[0] == "is_obsolete") { $buf .= parent::triplify($tid, "rdf:type", "owl:DeprecatedClass"); $is_deprecated = true; } else { if ($a[0][0] == "!") { $a[0] = substr($a[0], 1); } $buf .= parent::triplifyString($tid, "obo_vocabulary:{$a['0']}", str_replace('"', '', stripslashes($a[1]))); } } } } } else { if (isset($term)) { if ($a[0] == "is_obsolete" && $a[1] == "true") { $t = parent::triplify($tid, "rdf:type", "owl:DeprecatedClass"); $t .= parent::triplify($tid, "rdfs:subClassOf", "owl:DeprecatedClass"); $min .= $t; $buf .= $t; $is_deprecated = true; } else { if ($a[0] == "id") { parent::getRegistry()->parseQName($a[1], $ns, $id); $tid = "{$ns}:{$id}"; // $buf .= parent::describeClass($tid,null,"owl:Class"); // $buf .= parent::triplify($tid,"rdfs:isDefinedBy",$ouri); } else { if ($a[0] == "name") { // $t = parent::triplifyString($tid,"rdfs:label",str_replace(array("\"", "'"), array("","\\\'"), stripslashes($a[1]))." [$tid]"); $label = str_replace(array("\"", "'"), array("", "\\\\'"), stripslashes($a[1])); $t = parent::describeIndividual($tid, $label, "owl:Class"); $t .= parent::triplify($tid, "rdfs:isDefinedBy", $ouri); $min .= $t; $buf .= $t; } else { if ($a[0] == "def") { $t = str_replace(array("'", "\"", "\\", "\\\\'"), array("\\\\'", "", "", ""), $a[1]); $min .= parent::triplifyString($tid, "dc:description", $t); $buf .= parent::triplifyString($tid, "dc:description", $t); } else { if ($a[0] == "property_value") { $b = explode(" ", $a[1]); $buf .= parent::triplifyString($tid, "obo_vocabulary:" . strtolower($b[0]), str_replace("\"", "", strtolower($b[1]))); } else { if ($a[0] == "xref") { // http://upload.wikimedia.org/wikipedia/commons/3/34/Anatomical_Directions_and_Axes.JPG // Medical Dictionary:http\://www.medterms.com/ // KEGG COMPOUND:C02788 "KEGG COMPOUND" // id-validation-regexp:\"REACT_[0-9\]\{1\,4}\\.[0-9\]\{1\,3}|[0-9\]+\" //$a[1] = 'id-validation-regexp:\"REACT_[0-9\]\{1\,4}\\.[0-9\]\{1\,3}|[0-9\]+\"'; if (substr($a[1], 0, 4) == "http") { $buf .= parent::triplify($tid, "rdfs:seeAlso", str_replace(array(" ", '"wiki"', "\\"), array("+", "", ""), $a[1])); } else { $b = explode(":", $a[1], 2); if (substr($b[1], 0, 4) == "http") { $buf .= parent::triplify($tid, "rdfs:seeAlso", stripslashes($b[1])); } else { $ns = str_replace(array(" ", "\\"), "", strtolower($b[0])); $id = trim($b[1]); // there may be a comment to remove if (FALSE !== ($pos = strrpos($id, ' "'))) { $comment = substr($id, $pos + 1, -1); $id = substr($id, 0, $pos); } $id = stripslashes($id); // there may be a source statement to remove $id = preg_replace("/{.*\\}/", "", $id); if ($ns == "pmid") { $ns = "pubmed"; $y = explode(" ", $id); $id = $y[0]; } if ($ns == "xx") { continue; } if ($ns == "icd9cm") { $y = explode(" ", $id); $id = $y[0]; } if ($ns == "xref; umls_cui") { continue; } if ($ns == "submitter") { $ns = "chebi.submitter"; } if ($ns == "wikipedia" || $ns == "mesh") { $id = str_replace(" ", "+", $id); } if ($ns == "id-validation-regexp") { $buf .= parent::triplifyString($tid, "obo_vocabulary:{$ns}", addslashes($id)); } else { $buf .= parent::triplify($tid, "obo_vocabulary:x-{$ns}", "{$ns}:" . str_replace(" ", "-", $id)); } } } } else { if ($a[0] == "synonym") { // synonym: "entidades moleculares" RELATED [IUPAC:] // synonym: "molecular entity" EXACT IUPAC_NAME [IUPAC:] // synonym: "Chondrococcus macrosporus" RELATED synonym [NCBITaxonRef:Krzemieniewska_and_Krzemieniewski_1926] //grab string inside double quotes preg_match('/"(.*)"(.*)/', $a[1], $matches); if (!empty($matches)) { $a[1] = str_replace(array("\\", "\"", "'"), array("", "", "\\\\'"), $matches[1] . $matches[2]); } else { $a[1] = str_replace(array("\"", "'"), array("", "\\\\'"), $a[1]); } $rel = "SYNONYM"; $list = array("EXACT", "BROAD", "RELATED", "NARROW"); $found = false; foreach ($list as $keyword) { // get everything after the keyword up until the bracket [ if (FALSE !== ($k_pos = strpos($a[1], $keyword))) { $str_len = strlen($a[1]); $keyword_len = strlen($keyword); $keyword_end_pos = $k_pos + $keyword_len; $b1_pos = strrpos($a[1], "["); $b2_pos = strrpos($a[1], "]"); $b_text = substr($a[1], $b1_pos + 1, $b2_pos - $b1_pos - 1); $diff = $b1_pos - $keyword_end_pos - 1; if ($diff != 0) { // then there is more stuff here $k = substr($a[1], $keyword_end_pos + 1, $diff); $rel = trim($k); } else { // create the long predicate $rel = $keyword . "_SYNONYM"; } $found = true; $str = substr($a[1], 0, $k_pos - 1); break; } } // check to see if we still haven't found anything if ($found === false) { // we didn't find one of the keywords // so take from the start to the bracket $b1_pos = strrpos($a[1], "["); $str = substr($a[1], 0, $b1_pos - 1); } $rel = str_replace(" ", "_", $rel); // $lit = addslashes($str.($b_text?" [".$b_text."]":"")); $l = parent::triplifyString($tid, "obo_vocabulary:" . strtolower($rel), $str); $buf .= $l; } else { if ($a[0] == "alt_id") { parent::getRegistry()->parseQname($a[1], $ns, $id); if ($id != 'curators') { $buf .= parent::triplify("{$ns}:{$id}", "rdfs:seeAlso", stripslashes($tid)); } } else { if ($a[0] == "is_a") { // do subclassing parent::getRegistry()->parseQName($a[1], $ns, $id); $t = parent::triplify($tid, "rdfs:subClassOf", "{$ns}:{$id}"); $buf .= $t; $min .= $t; $is_a = true; } else { if ($a[0] == "intersection_of") { if (!isset($intersection_of)) { // $intersection_of = '<'.parent::getRegistry()->getFQURI($tid).'> <'.parent::getRegistry()->getFQURI('owl:equivalentClass').'> [<'.parent::getRegistry()->getFQURI('rdf:type').'> <'.parent::getRegistry()->getFQURI('owl:Class').'>; <'.parent::getRegistry()->getFQURI('owl:intersectionOf').'> ('; $intersection_of = '<' . parent::getRegistry()->getFQURI($tid) . '> <' . parent::getRegistry()->getFQURI('owl:equivalentClass') . '> _:b' . ++$bid . " {$graph_uri} ." . PHP_EOL; $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('rdf:type') . '> <' . parent::getRegistry()->getFQURI('owl:Class') . "> {$graph_uri} ." . PHP_EOL; $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:intersectionOf') . '> _:b' . ++$bid . " {$graph_uri} ." . PHP_EOL; } /* intersection_of: ECO:0000206 ! BLAST evidence intersection_of: develops_from VAO:0000092 ! chondrogenic condensation intersection_of: OBO_REL:has_part VAO:0000040 ! cartilage tissue */ $c = explode(" ", $a[1]); if (count($c) == 1) { // just a class parent::getRegistry()->parseQName($c[0], $ns, $id); $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('rdfs:subClassOf') . '> <' . parent::getRegistry()->getFQURI("{$ns}:{$id}") . "> {$graph_uri} ." . PHP_EOL; $buf .= parent::triplify($tid, "rdfs:subClassOf", "{$ns}:{$id}"); } else { if (count($c) == 2) { // an expression parent::getRegistry()->parseQName($c[0], $pred_ns, $pred_id); parent::getRegistry()->parseQName($c[1], $obj_ns, $obj_id); $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:onProperty') . '> <' . parent::getRegistry()->getFQURI("obo_vocabulary:" . $pred_id) . "> {$graph_uri} ." . PHP_EOL; $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:someValuesFrom') . '> <' . parent::getRegistry()->getFQURI("{$obj_ns}:{$obj_id}") . "> {$graph_uri} ." . PHP_EOL; $buf .= parent::triplify($tid, "obo_vocabulary:{$pred_id}", "{$obj_ns}:{$obj_id}"); } } } else { if ($a[0] == "relationship") { if (!isset($relationship)) { $relationship = '<' . parent::getRegistry()->getFQURI($tid) . '> <' . parent::getRegistry()->getFQURI('rdfs:subClassOf') . '> _:b' . ++$bid . " {$graph_uri} ." . PHP_EOL; $relationship .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('rdf:type') . '> <' . parent::getRegistry()->getFQURI('owl:Class') . "> {$graph_uri} ." . PHP_EOL; $relationship .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:intersectionOf') . '> _:b' . ++$bid . " {$graph_uri} ." . PHP_EOL; } /* relationship: develops_from VAO:0000092 ! chondrogenic condensation relationship: OBO_REL:has_part VAO:0000040 ! cartilage tissue */ $c = explode(" ", $a[1]); if (count($c) == 1) { // just a class parent::getRegistry()->parseQName($c[0], $ns, $id); $relationship .= parent::getRegistry()->getFQURI("{$ns}:{$id}"); $buf .= parent::triplify($tid, "rdfs:subClassOf", "{$ns}:{$id}"); } else { if (count($c) == 2) { // an expression parent::getRegistry()->parseQName($c[0], $pred_ns, $pred_id); parent::getRegistry()->parseQName($c[1], $obj_ns, $obj_id); $relationship .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:onProperty') . '> <' . parent::getRegistry()->getFQURI("obo_vocabulary:" . $pred_id) . "> {$graph_uri} ." . PHP_EOL; $relationship .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:someValuesFrom') . '> <' . parent::getRegistry()->getFQURI("{$obj_ns}:{$obj_id}") . "> {$graph_uri} ." . PHP_EOL; $buf .= parent::triplify($tid, "obo_vocabulary:{$pred_id}", "{$obj_ns}:{$obj_id}"); } } } else { // default handler if (isset($a[1])) { $buf .= parent::triplifyString($tid, "obo_vocabulary:{$a['0']}", str_replace(array("\"", "'"), array("", "\\\\'"), stripslashes($a[1]))); } } } } } } } } } } } } } else { //header //format-version: 1.0 $buf .= parent::triplifyString($ouri, "obo_vocabulary:{$a['0']}", str_replace(array('"', '\\:'), array('\\"', ':'), isset($a[1]) ? $a[1] : "")); } } if ($minimal || $minimalp) { parent::getWriteFile()->write($min); } else { parent::getWriteFile()->write($buf); } $min = ''; $buf = ''; $header = ''; } //if(isset($intersection_of)) $buf .= $intersection_of.")].".PHP_EOL; //if(isset($relationship)) $buf .= $relationship.")].".PHP_EOL; if ($minimal || $minimalp) { parent::getWriteFile()->Write($min); } else { parent::getWriteFile()->write($buf); } }