function Run() { $idir = $this->GetParameterValue('indir'); $odir = $this->GetParameterValue('outdir'); $files = $this->GetParameterValue('files'); // set the work if ($files != 'all') { // check if comma-separated, or hyphen-range $list = explode(",", $files); if (count($list) == 1) { // try hyphen separated $range = explode("-", $files); if (count($range) == 2) { for ($i = $range[0]; $i <= $range[1]; $i++) { $myfiles[] = $i; } } else { // must a single entry $myfiles[] = $files; } } else { $myfiles = $list; } } $rest_uri = 'http://sabiork.h-its.org/sabioRestWebServices/'; $getReactionIds_url = $rest_uri . "suggestions/SABIOReactionIDs"; $reaction_list_file = $idir . "reactions.xml"; if (!file_exists($reaction_list_file) || $this->GetParameterValue('download') == 'true') { $xml = file_get_contents($getReactionIds_url); if (FALSE === $reaction_ids) { exit; } $f = new FileFactory($reaction_list_file); $f->Write($xml); $f->Close(); } $xml = simplexml_load_file($reaction_list_file); $total = count($xml->SABIOReactionID); if (isset($myfiles)) { $total = count($myfiles); } $i = 0; foreach ($xml->SABIOReactionID as $rid) { if (isset($myfiles)) { if (!in_array($rid, $myfiles)) { continue; } } $i++; echo "{$i} / {$total} : reaction {$rid}"; $reaction_file = $idir . "reaction_" . $rid . ".owl.gz"; if (!file_exists($reaction_file) || $this->GetParameterValue('download') == 'true') { $url = $rest_uri . 'searchKineticLaws/biopax?q=SabioReactionID:' . $rid; $data = file_get_contents($url); if ($data === FALSE) { continue; } $f = new FileFactory($reaction_file, true); $f->Write($data); $f->Close(); } $buf = file_get_contents("compress.zlib://" . $reaction_file); // send for parsing $p = new BioPAX2Bio2RDF(); $p->SetBuffer($buf)->SetBioPAXVersion(3)->SetBaseNamespace("http://sabio.h-its.org/biopax#")->SetBio2RDFNamespace("http://bio2rdf.org/sabiork:")->SetDatasetURI($this->GetDatasetURI()); $rdf = $p->Parse(); $ofile = "sabiork_{$rid}.nt"; $gz = false; if ($this->GetParameterValue("graph_uri")) { $ofile = "sabiork_{$rid}.nq"; } if ($this->GetParameterValue("gzip")) { $gz = true; $ofile .= ".gz"; } $this->SetWriteFile($odir . $ofile, $gz); $this->GetWriteFile()->Write($rdf); $this->GetWriteFile()->Close(); $bio2rdf_download_files[] = $this->GetBio2RDFDownloadURL($this->GetNamespace()) . $ofile; echo PHP_EOL; } // generate the release file $desc = $this->GetBio2RDFDatasetDescription($this->GetNamespace(), "https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sabiork/sabiork.php", $bio2rdf_download_files, "sabiork.h-its.org", array("use-share-modify", "no-commercial"), null, $this->GetParameterValue('download_url'), $this->version); $this->SetWriteFile($odir . $this->GetBio2RDFReleaseFile($this->GetNamespace())); $this->GetWriteFile()->Write($desc); $this->GetWriteFile()->Close(); }
function Run() { $idir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $files = parent::getParameterValue('files'); // set the work if ($files != 'all') { // check if comma-separated, or hyphen-range $list = explode(",", $files); if (count($list) == 1) { // try hyphen separated $range = explode("-", $files); if (count($range) == 2) { for ($i = $range[0]; $i <= $range[1]; $i++) { $myfiles[] = $i; } } else { // must a single entry $myfiles[] = $files; } } else { $myfiles = $list; } } $rest_uri = 'http://sabiork.h-its.org/sabioRestWebServices/'; $getReactionIds_url = $rest_uri . "suggestions/SABIOReactionIDs"; $reaction_list_file = $idir . "reactions.xml"; if (!file_exists($reaction_list_file) || parent::getParameterValue('download') == 'true') { $xml = file_get_contents($getReactionIds_url); if (FALSE === $reaction_list_file) { exit; } $f = new FileFactory($reaction_list_file); $f->Write($xml); $f->Close(); } $xml = simplexml_load_file($reaction_list_file); $total = count($xml->SABIOReactionID); if (isset($myfiles)) { $total = count($myfiles); } $i = 0; parent::setCheckpoint('dataset'); $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } $suffix = parent::getParameterValue('output_format'); $ofile = "sabiork." . $suffix; $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false; parent::setWriteFile($odir . $ofile, $gz); foreach ($xml->SABIOReactionID as $rid) { parent::setCheckpoint('file'); if (isset($myfiles)) { if (!in_array($rid, $myfiles)) { continue; } } $i++; echo "{$i} / {$total} : reaction {$rid}" . PHP_EOL; $reaction_file = $idir . "reaction_" . $rid . ".owl.gz"; if (!file_exists($reaction_file) || $this->GetParameterValue('download') == 'true') { $url = $rest_uri . 'searchKineticLaws/biopax?q=SabioReactionID:' . $rid; $data = file_get_contents($url); if ($data === FALSE) { continue; } $f = new FileFactory($reaction_file, true); $f->Write($data); $f->Close(); } $buf = file_get_contents("compress.zlib://" . $reaction_file); // send for parsing $p = new BioPAX2Bio2RDF($this); $p->SetBuffer($buf)->SetBioPAXVersion(3)->SetBaseNamespace("http://sabio.h-its.org/biopax#")->SetBio2RDFNamespace("http://bio2rdf.org/sabiork:")->SetDatasetURI($this->GetDatasetURI()); $rdf = $p->Parse(); parent::getWriteFile()->Write($rdf); } parent::getWriteFile()->Close(); //generate dataset description echo "Generating dataset description... "; $source_file = (new DataResource($this))->setURI("http://sabiork.h-its.org/sabioRestWebServices/searchKineticLaws/biopax")->setTitle("SABIO-RK Biochemical Reaction Kinetics Database")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($odir . $ofile)))->setFormat("text/xml")->setPublisher("http://sabio.villa-bosch.de/")->setHomepage("http://sabio.villa-bosch.de/")->setRights("use-share-modify")->setRights("no-commercial")->setLicense("http://sabio.villa-bosch.de/layouts/content/termscondition.gsp")->setDataset("http://identifiers.org/sabiork.reaction/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sabiork/sabiork.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description = $source_file->toRDF() . $output_file->toRDF(); //write dataset description to file parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function Run() { // directory shortcuts $ldir = $this->GetParameterValue('indir'); $odir = $this->GetParameterValue('outdir'); // get the work specified $list = trim($this->GetParameterValue('files')); if ($list == 'all') { // call the getAllModelsId webservice $file = $ldir . "all_models.json"; if (!file_exists($file)) { try { $x = @new SoapClient("http://www.ebi.ac.uk/biomodels-main/services/BioModelsWebServices?wsdl"); } catch (Exception $e) { echo $e->getMessage(); } $entries = $x->getAllModelsId(); file_put_contents($file, json_encode($entries)); } else { $entries = json_decode(file_get_contents($file)); } } elseif ($list == 'curated') { // call the getAllCuratedModelsId webservice $file = $ldir . "curated_models.json"; if (!file_exists($file)) { try { $x = @new SoapClient("http://www.ebi.ac.uk/biomodels-main/services/BioModelsWebServices?wsdl"); } catch (Exception $e) { echo $e->getMessage(); } $entries = $x->getAllCuratedModelsId(); file_put_contents($file, json_encode($entries)); } else { $entries = json_decode(file_get_contents($file)); } } else { // check if a hyphenated list was provided if (($pos = strpos($list, "-")) !== FALSE) { $start_range = substr($list, 0, $pos); $end_range = substr($list, $pos + 1); for ($i = $start_range; $i <= $end_range; $i++) { $entries[] = "BIOMD" . str_pad($i, 10, "0", STR_PAD_LEFT); } } else { // for comma separated list $b = explode(",", $this->GetParameterValue('files')); foreach ($b as $e) { $entries[] = "BIOMD" . str_pad($e, 10, "0", STR_PAD_LEFT); } } } // set the write file $outfile = 'biomodels.nt'; $gz = false; if ($this->GetParameterValue('graph_uri')) { $outfile = 'biomodels.nq'; } if ($this->GetParameterValue('gzip')) { $outfile .= '.gz'; $gz = true; } $bio2rdf_download_files[] = $this->GetBio2RDFDownloadURL($this->GetNamespace()) . $outfile; $this->SetWriteFile($odir . $outfile, $gz); // iterate over the entries $i = 0; $total = count($entries); foreach ($entries as $id) { echo "processing " . ++$i . " of {$total} - biomodel# " . $id; $download_file = $ldir . $id . ".owl.gz"; // download if the file doesn't exist or we are told to if (!file_exists($download_file) || $this->GetParameterValue('download') == 'true') { // download echo " - downloading"; $url = $this->GetParameterValue('download_url') . "{$id}/{$id}-biopax3.owl"; $buf = file_get_contents($url); if (strlen($buf) != 0) { file_put_contents("compress.zlib://" . $download_file, $buf); // usleep(500000); // limit of 4 requests per second } } // load entry, parse and write to file echo " - parsing"; // $this->SetReadFile($download_file,true); $buf = file_get_contents("compress.zlib://" . $download_file); $converter = new BioPAX2Bio2RDF(); $converter->SetBuffer($buf)->SetBioPAXVersion(3)->SetBaseNamespace("http://identifiers.org/biomodels.db/{$id}/")->SetBio2RDFNamespace("http://bio2rdf.org/biomodels:" . $id . "_")->SetDatasetURI($this->GetDatasetURI()); $this->AddRDF($converter->Parse()); $this->WriteRDFBufferToWriteFile(); echo PHP_EOL; } $this->GetWriteFile()->Close(); // generate the release file $this->DeleteBio2RDFReleaseFiles($odir); $desc = $this->GetBio2RDFDatasetDescription($this->GetNamespace(), "https://github.com/bio2rdf/bio2rdf-scripts/blob/master/biomodels/biomodels.php", $bio2rdf_download_files, "http://www.ebi.ac.uk/biomodels-main/", array("use-share-modify"), null, $this->GetParameterValue('download_url'), $this->version); $this->SetWriteFile($odir . $this->GetBio2RDFReleaseFile($this->GetNamespace())); $this->GetWriteFile()->Write($desc); $this->GetWriteFile()->Close(); return true; }
function Run() { // get the work if ($this->GetParameterValue('files') == 'all') { $sources = explode("|", parent::getParameterList('files')); array_shift($sources); } else { // comma separated list $sources = explode(",", parent::getParameterValue('files')); } $download_files = array("h**o-sapiens" => "Pathway%20Commons%202%20homo%20sapiens.BIOPAX.owl.gz", "hprd" => "Pathway%20Commons%202%20HPRD.BIOPAX.owl.gz", "humancyc" => "Pathway%20Commons%202%20HumanCyc.BIOPAX.owl.gz", "nci-nature" => "Pathway%20Commons%202%20NCI_Nature.BIOPAX.owl.gz", "panther-pathway" => "Pathway%20Commons%202%20PANTHER%20Pathway.BIOPAX.owl.gz", "phosphositeplus" => "Pathway%20Commons%202%20PhosphoSitePlus.BIOPAX.owl.gz", "reactome" => "Pathway%20Commons%202%20Reactome.BIOPAX.owl.gz"); $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } $dataset_description = ''; // iterate over the requested data foreach ($sources as $source) { echo "processing {$source}... "; $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); // set the remote and input files $file = $source . ".owl"; $zfile = $source . ".owl.gz"; $rfile = $rdir . $download_files[$source]; $lfile = $ldir . $zfile; // download if if the file doesn't exist locally or we are told to if (!file_exists($lfile) || $this->GetParameterValue('download') == 'true') { // download echo "downloading... "; file_put_contents($lfile, file_get_contents($rfile)); } // extract the file out of the ziparchive // and load into a buffer echo 'extracting... '; if (($fpin = gzopen($lfile, "r")) === FALSE) { trigger_error("Unable to open {$lfile}", E_USER_ERROR); exit; } $data = ''; while (!gzeof($fpin)) { $buffer = gzgets($fpin, 4096); $data .= $buffer; } gzclose($fpin); // set the output file $suffix = parent::getParameterValue('output_format'); $outfile = $source . '.' . $suffix; $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } parent::setWriteFile($odir . $outfile, $gz); // send for parsing $p = new BioPAX2Bio2RDF($this); $p->SetBuffer($data)->SetBioPAXVersion(3)->SetBaseNamespace("http://purl.org/pc2/3/")->SetBio2RDFNamespace("http://bio2rdf.org/pathwaycommons:")->SetDatasetURI(parent::getDatasetURI()); $rdf = $p->Parse(); parent::addRDF($rdf); // write to output parent::writeRDFBufferToWriteFile(); parent::getWriteFile()->Close(); echo "done!" . PHP_EOL; //generate dataset description echo "Generating dataset description for {$zfile}... "; $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Pathway Commons")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("rdf/xml")->setPublisher("http://www.pathwaycommons.org/")->setHomepage("http://www.pathwaycommons.org/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("http://www.pathwaycommons.org/pc2/home.html#data_sources")->setDataset("http://identifiers.org/pathwaycommons/"); $dataset_description .= $source_file->toRDF(); echo "done!" . PHP_EOL; } echo "Generating dataset description for Bio2RDF Pathways Commons dataset... "; $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pathwaycommons/pathwaycommons.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $output_file->toRDF(); //write dataset description to file parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function Run() { // directory shortcuts $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); // get the work specified $list = trim(parent::getParameterValue('files')); if ($list == 'all') { // call the getAllModelsId webservice $file = $ldir . "all_models.json"; if (!file_exists($file)) { try { $x = @new SoapClient("http://www.ebi.ac.uk/biomodels-main/services/BioModelsWebServices?wsdl"); } catch (Exception $e) { echo $e->getMessage(); } $entries = $x->getAllModelsId(); file_put_contents($file, json_encode($entries)); } else { $entries = json_decode(file_get_contents($file)); } } elseif ($list == 'curated') { // call the getAllCuratedModelsId webservice $file = $ldir . "curated_models.json"; if (!file_exists($file)) { try { $x = @new SoapClient("http://www.ebi.ac.uk/biomodels-main/services/BioModelsWebServices?wsdl"); } catch (Exception $e) { echo $e->getMessage(); } $entries = $x->getAllCuratedModelsId(); file_put_contents($file, json_encode($entries)); } else { $entries = json_decode(file_get_contents($file)); } } else { // check if a hyphenated list was provided if (($pos = strpos($list, "-")) !== FALSE) { $start_range = substr($list, 0, $pos); $end_range = substr($list, $pos + 1); for ($i = $start_range; $i <= $end_range; $i++) { $entries[] = "BIOMD" . str_pad($i, 10, "0", STR_PAD_LEFT); } } else { // for comma separated list $b = explode(",", $this->GetParameterValue('files')); foreach ($b as $e) { $entries[] = "BIOMD" . str_pad($e, 10, "0", STR_PAD_LEFT); } } } $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } // set the write file $suffix = parent::getParameterValue('output_format'); $outfile = 'biomodels' . '.' . $suffix; $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } $dataset_description = ''; parent::setWriteFile($odir . $outfile, $gz); // iterate over the entries $i = 0; $total = count($entries); foreach ($entries as $id) { echo "processing " . ++$i . " of {$total} - biomodel# " . $id; $download_file = $ldir . $id . ".owl.gz"; $url = parent::getParameterValue('download_url') . "publ/{$id}/{$id}-biopax3.owl"; // download if the file doesn't exist or we are told to if (!file_exists($download_file) || $this->GetParameterValue('download') == 'true') { // download echo " - downloading"; $ret = utils::downloadsingle($url, 'compress.zlib://' . $download_file, true); if ($ret === false) { echo "\nTrying non-curated model"; $url = parent::getParametervalue('download_url') . "uncura_publ/{$id}/{$id}-biopax3.owl"; $ret = utils::downloadsingle($url, 'compress.zlib://' . $download_file, true); if ($ret === false) { continue; } } echo " - downloaded"; } // load entry, parse and write to file echo " - parsing... "; // $this->SetReadFile($download_file,true); $buf = file_get_contents("compress.zlib://" . $download_file); $converter = new BioPAX2Bio2RDF($this); $converter->SetBuffer($buf)->SetBioPAXVersion(3)->SetBaseNamespace("http://identifiers.org/biomodels.db/{$id}/")->SetBio2RDFNamespace("http://bio2rdf.org/biomodels:" . $id . "_")->SetDatasetURI($this->GetDatasetURI()); $rdf = $converter->Parse(); parent::addRDF($rdf); parent::writeRDFBufferToWriteFile(); //generate dataset description $source_file = (new DataResource($this))->setURI($url)->setTitle("EBI BioModels Database - BioModel # {$id}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($download_file)))->setFormat("rdf/xml")->setPublisher("http://www.ebi.ac.uk/")->setHomepage("http://www.ebi.ac.uk/biomodels-main/")->setRights("use-share-modify")->setLicense("http://www.ebi.ac.uk/biomodels-main/termsofuse")->setDataset("http://identifiers.org/biomodels.db/"); $dataset_description .= $source_file->toRDF(); echo "done!" . PHP_EOL; } //foreach parent::getWriteFile()->close(); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/biomodels/biomodels.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $output_file->toRDF(); //write dataset description to file parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }