Exemplo n.º 1
0
 function Run()
 {
     $file = "hgnc_complete_set.txt.gz";
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     $lfile = $ldir . $file;
     if (!file_exists($lfile) && parent::getParameterValue('download') == false) {
         trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE);
         parent::setParameterValue('download', true);
     }
     //download the hgnc file
     $rfile = null;
     if (parent::getParameterValue('download') == true) {
         $rfile = $rdir;
         echo "downloading {$file} ... ";
         Utils::DownloadSingle($rfile, $lfile);
     }
     $ofile = $odir . "hgnc." . parent::getParameterValue('output_format');
     $gz = false;
     if (strstr(parent::getParameterValue('output_format'), "gz")) {
         $gz = true;
     }
     parent::setWriteFile($ofile, $gz);
     parent::setReadFile($lfile, true);
     echo "processing {$file}... ";
     $this->process();
     echo "done!" . PHP_EOL;
     //close write file
     parent::getWriteFile()->close();
     echo PHP_EOL;
     // generate the dataset release file
     echo "generating dataset release file... ";
     $dataset_description = '';
     $source_file = (new DataResource($this))->setURI($rdir)->setTitle('HUGO Gene Nomenclature Committee (HGNC)')->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat('text/tab-separated-value')->setFormat('application/zip')->setPublisher('http://www.genenames.org/')->setHomepage('http://www.genenames.org/data/gdlw_columndef.html')->setRights('use')->setRights('attribution')->setLicense('http://www.genenames.org/about/overview')->setDataset(parent::getDatasetURI());
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/hgnc/hgnc.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("restricted-by-source-license")->setLicense("http://creativecommons/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     $this->setWriteFile($odir . $this->getBio2RDFReleaseFile());
     $this->getWriteFile()->write($dataset_description);
     $this->getWriteFile()->close();
 }
Exemplo n.º 2
0
 function Run()
 {
     $file = "homologene.data";
     $ldir = $this->GetParameterValue('indir');
     $odir = $this->GetParameterValue('outdir');
     $rdir = $this->GetParameterValue('download_url');
     $lfile = $ldir . $file;
     if (!file_exists($lfile)) {
         trigger_error($file . " not found. Will attempt to download.", E_USER_NOTICE);
         parent::setParameterValue('download', true);
     }
     //download
     $rfile = $rdir . $file;
     if ($this->GetParameterValue('download') == true) {
         echo "downloading {$file} ... ";
         utils::downloadSingle($rfile, $lfile);
     }
     $ofile = 'homologene.' . parent::getParameterValue('output_format');
     $gz = strstr(parent::getParameterValue('output_format'), "gz") ? $gz = true : ($gz = false);
     parent::setReadFile($lfile);
     parent::setWriteFile($odir . $ofile, $gz);
     echo "processing {$file}... ";
     $this->process();
     echo "done!" . PHP_EOL;
     parent::getWriteFile()->close();
     // generate the dataset release file
     $source_file = (new DataResource($this))->setURI($rfile)->setTitle("NCBI Homologene")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setPublisher("http://www.ncbi.nlm.nih.gov")->setHomepage("http://www.ncbi.nlm.nih.gov/homologene")->setRights("use-share-modify")->setLicense("http://www.ncbi.nlm.nih.gov/About/disclaimer.html")->setDataset("http://identifiers.org/homologene/");
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/homologene/homologene.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description = $source_file->toRDF() . $output_file->toRDF();
     echo "Generating dataset description... ";
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Exemplo n.º 3
0
 public function Run()
 {
     $file = "iproclass.tb.gz";
     $ldir = $this->GetParameterValue('indir');
     $odir = $this->GetParameterValue('outdir');
     $rdir = $this->GetParameterValue('download_url');
     $lfile = $ldir . $file;
     if (!file_exists($lfile)) {
         trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE);
         parent::setParameterValue('download', true);
     }
     //download all files
     $rfile = $rdir . $file;
     if ($this->GetParameterValue('download') == true) {
         echo "downloading {$file}... ";
         utils::DownloadSingle($rfile, $lfile);
         //			$cmd = "gzip -c $lfile | split -d -l 1000000 --filter='gzip > $FILE.gz' - iproclass-"
     }
     $ofile = "iproclass.nq";
     $gz = true;
     parent::setReadFile($lfile, true);
     echo "processing {$file}... ";
     $this->process();
     echo "done!" . PHP_EOL;
     parent::getWriteFile()->close();
     echo "generating dataset release file... ";
     $source_file = (new DataResource($this))->setURI($rfile)->setTitle("iProClass")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://pir.georgetown.edu")->setHomepage("http://pir.georgetown.edu/iproclass")->setRights("use-share-modify")->setLicense("http://pir.georgetown.edu/pirwww/about/linkpir.shtml")->setDataset("http://identifiers.org/iproclass/");
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/iproclass/iproclass.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description = $source_file->toRDF() . $output_file->toRDF();
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Exemplo n.º 4
0
 function Run()
 {
     // directory shortcuts
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     if (parent::getParameterValue('omim_api_key') == '') {
         $key_file = parent::getParameterValue('omim_api_key_file');
         if (file_exists($key_file)) {
             $key = trim(file_get_contents($key_file));
             if ($key) {
                 parent::setParameterValue('omim_api_key', $key);
             } else {
                 trigger_error("No API key found in the specified omim key file {$key_file}", E_USER_WARNING);
             }
         } else {
             trigger_error("No OMIM key has been provided either by commmand line or in the expected omim key file {$key_file}", E_USER_WARNING);
         }
     }
     // get the list of mim2gene entries
     $entries = $this->GetListOfEntries($ldir);
     // get the work specified
     $list = trim(parent::getParameterValue('files'));
     if ($list != 'all') {
         // check if a hyphenated list was provided
         if (($pos = strpos($list, "-")) !== FALSE) {
             $start_range = substr($list, 0, $pos);
             $end_range = substr($list, $pos + 1);
             // get the whole list
             $full_list = $this->GetListOfEntries($ldir);
             // now intersect
             foreach ($full_list as $e => $type) {
                 if ($e >= $start_range && $e <= $end_range) {
                     $myentries[$e] = $type;
                 }
             }
             $entries = $myentries;
         } else {
             // for comma separated list
             $b = explode(",", parent::getParameterValue('files'));
             foreach ($b as $e) {
                 $myentries[$e] = '';
             }
             $entries = array_intersect_key($entries, $myentries);
         }
     }
     // set the write file
     $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
     $outfile = 'omim.' . parent::getParameterValue('output_format');
     parent::setWriteFile($odir . $outfile, $gz);
     // declare the mapping method types
     $this->get_method_type(null, true);
     // iterate over the entries
     $i = 0;
     $total = count($entries);
     foreach ($entries as $omim_id => $type) {
         echo "processing " . ++$i . " of {$total} - omim# ";
         $download_file = $ldir . $omim_id . ".json.gz";
         $gzfile = "compress.zlib://{$download_file}";
         // download if the file doesn't exist or we are told to
         if (!file_exists($download_file) || parent::getParameterValue('download') == true) {
             // download using the api
             $url = parent::getParameterValue('omim_api_url') . '&apiKey=' . parent::getParameterValue('omim_api_key') . '&mimNumber=' . $omim_id;
             $buf = file_get_contents($url);
             if (strlen($buf) != 0) {
                 file_put_contents($download_file, $buf);
                 usleep(500000);
                 // limit of 4 requests per second
             }
         }
         // load entry, parse and write to file
         $entry = json_decode(file_get_contents($gzfile), true);
         $omim_id = trim((string) $entry["omim"]["entryList"][0]["entry"]['mimNumber']);
         echo $omim_id;
         $this->ParseEntry($entry, $type);
         parent::writeRDFBufferToWriteFile();
         echo PHP_EOL;
     }
     parent::writeRDFBufferToWriteFile();
     parent::getWriteFile()->close();
     // generate the dataset description file
     $source_file = (new DataResource($this))->setURI(parent::getParameterValue('omim_api_url'))->setTitle("OMIM " . parent::getDatasetVersion())->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z"))->setFormat("application/json")->setPublisher("http://omim.org")->setHomepage("http://omim.org")->setRights("use")->setRights("no-commercial")->setRights("registration-required")->setLicense("http://www.omim.org/help/agreement")->setDataset("http://identifiers.org/omim/");
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/omim/omim.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description = $source_file->toRDF() . $output_file->toRDF();
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     return TRUE;
 }
Exemplo n.º 5
0
 function run()
 {
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     // get the snps from pharmgkb
     $snps = explode(",", parent::getParameterValue('files'));
     if ($snps[0] == 'all') {
         $snps = $this->getSNPs();
     } else {
         if ($snps[0] == 'clinical') {
             $snps = $this->getSNPs(true);
         } else {
             if ($snps[0] == 'omim') {
                 $lfile = $ldir . 'snp_omimvar.txt';
                 if (!file_exists($lfile) || parent::getParameterValue('download') == true) {
                     $ret = utils::DownloadSingle('ftp://ftp.ncbi.nlm.nih.gov/snp/Entrez/snp_omimvar.txt', $lfile);
                 }
                 $snps = $this->processOMIMVar($lfile);
             } else {
                 if ($snps[0] == 'pharmgkb') {
                     $lfile = $ldir . 'pharmgkb.snp.zip';
                     if (!file_exists($lfile) || parent::getParameterValue('download') == true) {
                         $ret = utils::DownloadSingle('http://www.pharmgkb.org/download.do?objId=rsid.zip&dlCls=common', $lfile);
                     }
                     $snps = $this->processPharmGKBSnps($lfile);
                 }
             }
         }
     }
     $outfile = $odir . "dbsnp." . parent::getParameterValue('output_format');
     $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
     parent::setWriteFile($outfile, $gz);
     $n = count($snps);
     $z = 0;
     foreach ($snps as $i => $snp) {
         $file = $snp . '.xml.gz';
         $infile = $ldir . $file;
         $rfile = parent::getParameterValue('download_url') . $snp;
         //$outfile = $odir.$snp.".".parent::getParameterValue('output_format');
         // check if exists
         $download = false;
         if (!file_exists($infile)) {
             //trigger_error($lfile." not found. Will attempt to download. ", E_USER_NOTICE);
             parent::setParameterValue('download', true);
         }
         // download
         if (parent::getParameterValue('download') == true) {
             trigger_error("Downloading {$file}", E_USER_NOTICE);
             $ret = utils::downloadSingle($rfile, "compress.zlib://" . $infile, true);
             if ($ret === false) {
                 continue;
             }
         }
         // process
         echo "Processing {$snp} (" . ($i + 1) . "/{$n})" . PHP_EOL;
         $this->parse($infile);
         parent::writeRDFBufferToWriteFile();
         if ($z++ % 10000 == 0) {
             parent::clear();
         }
     }
     parent::getWriteFile()->close();
     // generate the dataset description file
     $source_file = (new DataResource($this))->setURI($rfile)->setTitle("dbSNP " . parent::getDatasetVersion())->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z"))->setFormat("application/xml")->setPublisher("http://www.ncbi.nlm.nih.gov")->setHomepage("http://www.ncbi.nlm.nih.gov/SNP/")->setRights("use-share-modify")->setLicense("http://www.ncbi.nlm.nih.gov/About/disclaimer.html")->setDataset("http://identifiers.org/dbsnp/");
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/dbsnp/dbsnp.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description = $source_file->toRDF() . $output_file->toRDF();
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
 }