Esempio n. 1
0
 function run()
 {
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $dd = '';
     $files = parent::getParameterValue('files');
     if ($files == 'all') {
         $files = explode('|', parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(',', parent::getParameterValue('files'));
     }
     foreach ($files as $file) {
         echo "processing {$file} ...";
         $lfile = $ldir . $this->filemap[$file];
         $rfile = parent::getParameterValue('download_url') . $this->filemap[$file];
         if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
             $ret = utils::downloadSingle($rfile, $lfile);
             if ($ret === false) {
                 echo "unable to download {$file} ... skipping" . PHP_EOL;
                 continue;
             }
         }
         parent::setReadFile($lfile, true);
         $suffix = parent::getParameterValue('output_format');
         $ofile = "orphanet-" . $file . '.' . $suffix;
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? $gz = true : ($gz = false);
         /*			parent::setWriteFile($odir.$ofile, $gz);
         			$this->$file($lfile);
         			parent::getWriteFile()->close();
         */
         parent::getReadFile()->close();
         parent::clear();
         echo "done!" . PHP_EOL;
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Orphanet: {$file}")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("application/xml")->setPublisher("http://www.orpha.net")->setHomepage("http://www.orpha.net/")->setRights("use")->setRights("sharing-modified-version-needs-permission")->setLicense("http://creativecommons.org/licenses/by-nd/3.0/")->setDataset("http://identifiers.org/orphanet/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = parent::getDate(filemtime($odir . $ofile));
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/orphanet/orphanet.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dd .= $source_file->toRDF() . $output_file->toRDF();
     }
     //foreach
     parent::writeToReleaseFile($dd);
 }
Esempio n. 2
0
 function Run()
 {
     // directory shortcuts
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     // get the listings page
     $url = trim(parent::getParameterValue('download_url'));
     $listing_file = $ldir . "probeset_list.html";
     if (!file_exists($listing_file) || parent::getParameterValue("download") == "true") {
         echo "Downloading {$listing_file}" . PHP_EOL;
         Utils::DownloadSingle($url, $listing_file);
     }
     $listings = file_get_contents($listing_file);
     // make a list of the csv.zip files
     preg_match_all("/\"([^\"]+)\\.csv\\.zip\"/", $listings, $m);
     if (count($m[1]) == 0) {
         trigger_error("could not find any .csv.zip files in {$url}");
         exit;
     }
     if (parent::getParameterValue("files") == 'all') {
         $myfiles = $m[1];
     } else {
         $a = explode(",", parent::getParameterValue("files"));
         foreach ($a as $f) {
             $found = false;
             foreach ($m[1] as $n) {
                 if (strstr($n, $f)) {
                     $found = true;
                     $myfiles[] = $n;
                     break;
                 }
             }
             if ($found === false) {
                 echo "cannot find {$f} in list" . PHP_EOL;
             }
         }
     }
     if (!isset($myfiles)) {
         exit;
     }
     // nothing to do
     $dataset_description = '';
     // set the write file
     $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
     $outfile = 'affymetrix.' . parent::getParameterValue('output_format');
     $this->setWriteFile($odir . $outfile, $gz);
     // iterate over the files
     foreach ($myfiles as $rfile) {
         $base_file = substr($rfile, strrpos($rfile, "/") + 1);
         $base_url = substr($rfile, 0, strrpos($rfile, "/"));
         // get and set the dataset version
         if (parent::getDatasetVersion() == null) {
             preg_match("/\\.na([0-9]{2})\\.annot/", $base_file, $m);
             if (isset($m[1])) {
                 $this->setDatasetVersion($m[1]);
             }
         }
         if (parent::getDatasetVersion() != parent::getParameterValue('version')) {
             $base_file = str_replace("na" . parent::getDatasetVersion(), "na" . parent::getParameterValue('version'), $base_file);
         }
         $csv_file = $base_file . ".csv";
         $zip_file = $csv_file . ".zip";
         $lfile = $ldir . $zip_file;
         if (!file_exists($lfile)) {
             echo "skipping: {$lfile} does not exist" . PHP_EOL;
             continue;
         }
         echo "processing {$lfile}" . PHP_EOL;
         // open the zip file
         $zin = new ZipArchive();
         if ($zin->open($lfile) === FALSE) {
             trigger_error("Unable to open {$lfile}");
             exit;
         }
         if (($fp = $zin->getStream($csv_file)) === FALSE) {
             trigger_error("Unable to get {$csv_file} in ziparchive {$lfile}");
             return FALSE;
         }
         parent::setReadFile($lfile);
         parent::getReadFile()->setFilePointer($fp);
         $this->parse($base_file);
         parent::getReadFile()->close();
         parent::clear();
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Affymetrix Probeset: {$base_file}")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/zip")->setPublisher("http://affymetrix.com")->setHomepage("http://www.affymetrix.com/support/technical/annotationfilesmain.affx")->setRights("use")->setRights("no-commercial")->setRights("registration-required")->setLicense("http://www.affymetrix.com/about_affymetrix/legal/index.affx")->setDataset("http://identifiers.org/affy.probeset/");
         $dataset_description .= $source_file->toRDF();
     }
     $this->getWriteFile()->close();
     // write the dataset description
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = parent::getDate(filemtime($odir . $outfile));
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/affymetrix/affymetrix.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description .= $output_file->toRDF();
     // write the dataset description
     $this->setWriteFile($odir . $this->getBio2RDFReleaseFile());
     $this->getWriteFile()->write($dataset_description);
     $this->getWriteFile()->close();
     return true;
 }
Esempio n. 3
0
 function Run()
 {
     $sp = trim(parent::getParameterValue('files'));
     if ($sp == 'all') {
         $files = $this->getPackageMap();
     } else {
         $s_a = explode(",", $sp);
         $pm = $this->getPackageMap();
         $files = array();
         foreach ($s_a as $a) {
             if (array_key_exists($a, $pm)) {
                 $files[$a] = $pm[$a];
             }
         }
     }
     //else
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $dd = '';
     //now iterate over the files array
     $year = parent::getParameterValue('year');
     foreach ($files as $k => $fpattern) {
         $file = str_replace("YEAR", $year, $fpattern);
         $lfile = $ldir . $file;
         $rfile = parent::getParameterValue("download_url") . $file;
         // download if necessary
         if (!file_exists($lfile) || parent::getParameterValue('download') == "true") {
             echo "Downloading {$file} ... ";
             $ret = utils::downloadSingle($rfile, $lfile);
             if ($ret === FALSE) {
                 trigger_error("Unable to get {$file}", E_USER_ERROR);
                 continue;
             }
             echo "done!" . PHP_EOL;
         }
         //set the outfile
         $ofile = "mesh_" . $k . "." . parent::getParameterValue('output_format');
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
         echo "processing {$k} ...";
         parent::setReadFile($lfile, FALSE);
         parent::setWriteFile($odir . $ofile, $gz);
         $fnx = $k;
         $this->{$fnx}();
         parent::writeRDFBufferToWriteFile();
         parent::getWriteFile()->close();
         echo "done!" . PHP_EOL;
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("MeSH")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/x-mesh-record")->setPublisher("http://www.nlm.nih.gov")->setHomepage("http://www.nlm.nih.gov/mesh/")->setRights("use")->setLicense("http://www.nlm.nih.gov/databases/download.html")->setDataset("http://identifiers.org/mesh/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = parent::getDate(filemtime($odir . $ofile));
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/mesh/mesh.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dd .= $source_file->toRDF() . $output_file->toRDF();
     }
     //foreach
     parent::setWriteFile($odir . $this->getBio2RDFReleaseFile($this->getNamespace()));
     parent::getWriteFile()->write($dd);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Esempio n. 4
0
 function run()
 {
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $dataset_description = '';
     $files = parent::getParameterValue('files');
     if ($files == 'all') {
         $files = explode('|', parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(',', parent::getParameterValue('files'));
     }
     if (parent::getParameterValue('id_list') != '') {
         $this->idlist = explode(",", parent::getParameterValue("id_list"));
     }
     // handle genes separately
     if (in_array("genes", $files)) {
         $orgs = array("hsa");
         //,"mmu","eco","dre","dme","ath","sce","ddi");
         echo "processing genes" . PHP_EOL;
         $ofile = "kegg-genes." . parent::getParameterValue('output_format');
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
         parent::setWriteFile($odir . $ofile, $gz);
         // get the list of genomes
         $lfile = $ldir . "genome.txt";
         $rfile = parent::getParameterValue("download_url") . "list/genome";
         if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
             $ret = utils::downloadSingle($rfile, $lfile);
         }
         $fp = fopen($lfile, "r");
         while ($l = fgets($fp)) {
             $a = explode("\t", $l);
             $b = explode(", ", $a[1]);
             $org = $b[0];
             if (!in_array($org, $orgs)) {
                 continue;
             }
             // get the list of genes for this organims
             echo "processing {$org}" . PHP_EOL;
             $this->org = $org;
             // local variable
             $lfile = $ldir . $org . ".txt";
             $rfile = parent::getParameterValue("download_url") . "list/{$org}";
             if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
                 $ret = utils::downloadSingle($rfile, $lfile);
             }
             parent::setReadFile($lfile, false);
             $this->process("gene");
             parent::getReadFile()->close();
             parent::clear();
             $this->org = null;
             // add dataset description
             $source_file = (new DataResource($this))->setURI($rfile)->setTitle("KEGG: Gene")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/plain")->setPublisher("http://www.kegg.jp/")->setHomepage("http://www.kegg.jp/")->setRights("use")->setRights("no-commercial")->setLicense("http://www.kegg.jp/kegg/legal.html")->setDataset("http://identifiers.org/kegg/");
             $dataset_description .= $source_file->toRDF();
         }
         fclose($fp);
         parent::getWriteFile()->close();
         echo "done" . PHP_EOL;
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = parent::getDate(filemtime($odir . $ofile));
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - Gene ")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/kegg/kegg.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $output_file->toRDF();
     }
     // all other files
     foreach ($files as $db) {
         if ($db == "genes") {
             continue;
         }
         echo "processing {$db}" . PHP_EOL;
         $lfile = $ldir . $db . ".txt";
         $rfile = parent::getParameterValue("download_url") . "list/{$db}";
         if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
             echo "Downloading {$rfile} ";
             $ret = utils::downloadSingle($rfile, $lfile);
             if ($ret === false) {
                 echo "unable to download {$file} ... skipping" . PHP_EOL;
                 continue;
             }
             echo "done." . PHP_EOL;
         }
         // now for each list, get the individual entries
         $ofile = "kegg-{$db}." . parent::getParameterValue('output_format');
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
         parent::setReadFile($lfile, false);
         parent::setWriteFile($odir . $ofile, $gz);
         $this->process($db);
         parent::getWriteFile()->close();
         parent::getReadFile()->close();
         parent::clear();
         echo "done!" . PHP_EOL;
         // add dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("KEGG: {$db}")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/plain")->setPublisher("http://www.kegg.jp/")->setHomepage("http://www.kegg.jp/")->setRights("use")->setRights("no-commercial")->setLicense("http://www.kegg.jp/kegg/legal.html")->setDataset("http://identifiers.org/kegg/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = parent::getDate(filemtime($odir . $ofile));
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$db} ")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/kegg/kegg.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     }
     // write the dataset description
     $this->setWriteFile($odir . $this->getBio2RDFReleaseFile());
     $this->getWriteFile()->write($dataset_description);
     $this->getWriteFile()->close();
 }
Esempio n. 5
0
 function run()
 {
     $idir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $files = parent::getParameterValue('files');
     $dataset_description = '';
     if ($files == 'all') {
         $files = explode('|', parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(',', parent::getParameterValue('files'));
     }
     foreach ($files as $file) {
         $f = $file;
         if ($file != "freq") {
             $f = "all_" . $file;
         }
         $f = "meddra_" . $f . ".tsv.gz";
         $lfile = $idir . $f;
         $rfile = parent::getParameterValue('download_url') . $f;
         if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
             echo "downloading {$file}... ";
             $ret = file_get_contents($rfile);
             if ($ret === FALSE) {
                 trigger_error("Unable to get {$rfile}", E_USER_WARNING);
                 continue;
             }
             $ret = file_put_contents($lfile, $ret);
             if ($ret === FALSE) {
                 trigger_error("Unable to write {$lfile}", E_USER_ERROR);
                 exit;
             }
             echo "done!" . PHP_EOL;
         }
         echo "Processing {$f}... ";
         parent::setReadFile($lfile, true);
         $suffix = parent::getParameterValue('output_format');
         $ofile = "sider-" . $file . '.' . $suffix;
         $gz = false;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         parent::setWriteFile($odir . $ofile, $gz);
         $this->{$file}();
         parent::getWriteFile()->Close();
         parent::getReadFile()->Close();
         echo "done!" . PHP_EOL;
         echo "Generating dataset description... ";
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("SIDER Side Effect resource ({$file}.tsv.gz")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://sideeffects.embl.de/")->setHomepage("http://sideeffects.embl.de/")->setRights("use-share-modify")->setLicense("http://creativecommons.org/licenses/by-nc-sa/3.0/")->setDataset("http://identifiers.org/sider.effect/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = parent::getDate(filemtime($odir . $ofile));
         $output_file = (new DataResource($this))->setURI("http://download.bio2df.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sider/sider.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     }
     //foreach
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }