Exemplo n.º 1
0
 function Run()
 {
     // directory shortcuts
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     // get the listings page
     $rfile = trim(parent::getParameterValue('download_url'));
     $file = "interpro.xml.gz";
     $lfile = $ldir . $file;
     if (!file_exists($lfile) || parent::getParameterValue("download") == "true") {
         echo "Downloading {$lfile}" . PHP_EOL;
         $ret = file_get_contents($rfile);
         if ($ret === FALSE) {
             trigger_error("unable to download {$rfile}");
             exit;
         }
         file_put_contents($lfile, $ret);
     }
     echo "Loading XML file...";
     $cxml = new CXML($lfile);
     $cxml->Parse();
     $xml = $cxml->GetXMLRoot();
     echo "Done" . PHP_EOL;
     // set the write file
     $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
     $outfile = "interpro." . parent::getParameterValue('output_format');
     parent::setWriteFile($odir . $outfile, $gz);
     echo "Parsing interpro xml file" . PHP_EOL;
     $this->parse($xml);
     parent::writeRDFBufferToWriteFile();
     parent::getWriteFile()->close();
     echo "Done!" . PHP_EOL;
     // let's make an nq file
     parent::setGraphURI(parent::getDatasetURI());
     // dataset description
     $source_version = parent::getDatasetVersion();
     $source_file = (new DataResource($this))->setURI($rfile)->setTitle("InterPro v{$source_version}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("application/xml")->setFormat("application/g-zip")->setPublisher("http://www.ebi.ac.uk/")->setHomepage("http://www.ebi.ac.uk/interpro/")->setRights("InterPro - Integrated Resource Of Protein Domains And Functional Sites. Copyright (C) 2001 The InterPro Consortium")->setLicense("http://www.ebi.ac.uk/interpro/faqs.html")->setDataset("http://identifiers.org/interpro/");
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} v{$source_version}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/interpro/interpro.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description = $source_file->toRDF() . $output_file->toRDF();
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     return true;
 }
Exemplo n.º 2
0
 function process_dir()
 {
     $this->setCheckPoint('dataset');
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $this->id_list = null;
     if (parent::getParameterValue('id_list') != '') {
         $this->id_list = array_flip(explode(",", trim(parent::getParameterValue("id_list"))));
     }
     $graph_uri = parent::getGraphURI();
     $dataset_description = '';
     $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
     //set graph URI to dataset graph
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $files = glob($ldir . "*.xml.gz");
     foreach ($files as $i => $file) {
         echo "Processing {$file} (" . ($i + 1) . "/" . count($files) . ") ...";
         $this->process_file($file);
         parent::clear();
         echo "done!" . PHP_EOL;
     }
     $source_file = (new DataResource($this))->setURI("http://www.ncbi.nlm.nih.gov/pubmed")->setTitle("NCBI PubMed")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($ldir)))->setFormat("text/xml")->setPublisher("http://ncbi.nlm.nih.gov/")->setHomepage("http://www.ncbi.nlm.nih.gov/pubmed/")->setRights("use-share-modify")->setLicense("http://www.nlm.nih.gov/databases/license/license.html")->setDataset("http://identifiers.org/pubmed/");
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pubmed/pubmed.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     //set graph URI back to default
     parent::setGraphURI($graph_uri);
     // write the dataset description
     $this->setWriteFile($odir . $this->getBio2RDFReleaseFile());
     $this->getWriteFile()->write($dataset_description);
     $this->getWriteFile()->close();
 }
Exemplo n.º 3
0
 /** parse directory of files */
 function parse_dir()
 {
     $ignore = array("..", '.', '.DS_STORE', "0");
     $this->setCheckPoint('dataset');
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $dataset_file = parent::getParameterValue("outdir") . parent::getBio2RDFReleaseFile();
     $fp = fopen($dataset_file, "w");
     if ($fp === FALSE) {
         trigger_error("Unable to open {$dataset_file}", E_USER_ERROR);
         return false;
     }
     $ids = explode(",", parent::getParameterValue('id_list'));
     $indir = parent::getParameterValue('indir');
     echo "Processing {$indir}\n";
     $outfile = "clinicaltrials." . parent::getParameterValue('output_format');
     $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
     parent::setWriteFile(parent::getParameterValue("outdir") . $outfile, $gz);
     $files = glob($indir . "NCT*");
     foreach ($files as $i => $file) {
         if ($i % 10000 == 0) {
             parent::clear();
         }
         $trial_id = basename($file, '.xml');
         if (parent::getParameterValue('id_list') == '' || in_array($trial_id, $ids)) {
             if (filesize($file) != 0) {
                 echo "Processing {$trial_id}" . PHP_EOL;
                 $this->process_file($file);
             } else {
                 echo "Processing {$trial_id} -> Empty!" . PHP_EOL;
             }
         }
     }
     echo "Finished." . PHP_EOL;
     parent::getWriteFile()->close();
     // make the dataset description
     parent::setGraphURI(parent::getDatasetURI());
     $rfile = "http://clinicaltrials.gov/ct2/show/NCT_ID?resultsxml=true";
     $source_version = parent::getDatasetVersion();
     // dataset description
     $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Clinicaltrials")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($file)))->setFormat("application/xml")->setPublisher("http://clinicaltrials.gov/")->setHomepage("http://clinicaltrials.gov/")->setRights("use")->setRights("by-attribution")->setLicense("http://clinicaltrials.gov/ct2/about-site/terms-conditions")->setDataset("http://identifiers.org/clinicaltrials/");
     parent::writeToReleaseFile($source_file->toRDF());
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} v{$source_version}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/clinicaltrials/clinicaltrials.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     parent::writeToReleaseFile($output_file->toRDF());
     parent::closeReleaseFile();
     // write the dataset description file
     fclose($fp);
 }
Exemplo n.º 4
0
 function process()
 {
     // get the file list
     if (parent::getParameterValue('files') == 'all') {
         $files = explode("|", parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(",", parent::getParameterValue('files'));
     }
     $dataset_description = '';
     //set directory values
     $ldir = parent::getParameterValue('indir');
     $rdir = parent::getParameterValue('download_url');
     $odir = parent::getParameterValue('outdir');
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $gz_suffix = ".gz";
     foreach ($files as $file) {
         if ($file == 'chem_gene_ixn_types') {
             $suffix = '.tsv';
         } else {
             if ($file == 'exposure_ontology') {
                 $suffix = '.obo';
             } else {
                 $suffix = ".tsv.gz";
             }
         }
         $lfile = $ldir . $file . $gz_suffix;
         $rfile = $rdir . 'CTD_' . $file . $suffix;
         if (!file_exists($lfile)) {
             trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE);
             if ($suffix == ".tsv.gz") {
                 Utils::DownloadSingle($rfile, $lfile);
             } else {
                 Utils::DownloadSingle($rfile, "compress.zlib://" . $lfile);
             }
         }
         $out_suffix = parent::getParameterValue('output_format');
         $ofile = "ctd_" . $file . "." . $out_suffix;
         $gz = false;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         echo "Processing " . $file . " ...";
         parent::setWriteFile($odir . $ofile, $gz);
         //set read file
         parent::setReadFile($lfile, TRUE);
         $fnx = "CTD_" . $file;
         $this->{$fnx}();
         //close write file
         parent::getWriteFile()->close();
         parent::clear();
         echo "done!" . PHP_EOL;
         // generate the dataset release file
         echo "Generating dataset description... ";
         if ($file == "chemicals") {
             $dataset = "http://identifiers.org/ctd.chemical/";
         } else {
             if ($file == "diseases") {
                 $dataset = "http://identifiers.org/ctd.disease/";
             } else {
                 if ($file == "genes") {
                     $dataset = "http://identifiers.org/ctd.gene/";
                 } else {
                     $dataset = null;
                 }
             }
         }
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Comparative Toxicogenomics Database ({$file}.{$gz_suffix}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://ctdbase.org/")->setHomepage("http://ctdbase.org/")->setRights("use")->setRights("by-attribution")->setRights("no-commercial")->setLicense("http://ctdbase.org/about/legal.jsp")->setDataset($dataset);
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ctd/ctd.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     }
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Exemplo n.º 5
0
 public function run()
 {
     if (parent::getParameterValue('files') == 'all') {
         $files = explode("|", parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(",", parent::getParameterValue('files'));
     }
     $release = parent::getParameterValue('release');
     $releaseb = "WS249";
     $remote_files = array("geneIDs" => "species/c_elegans/annotation/geneIDs/c_elegans.PRJNA13758." . $release . ".geneIDs.txt.gz", "functional_descriptions" => "species/c_elegans/annotation/functional_descriptions/c_elegans.PRJNA13758." . $release . ".functional_descriptions.txt.gz", "gene_interactions" => "species/c_elegans/annotation/gene_interactions/c_elegans.PRJNA13758." . $release . ".gene_interactions.txt.gz", "gene_associations" => "releases/current-production-release/ONTOLOGY/gene_association." . $releaseb . ".wb", "phenotype_associations" => "releases/current-production-release/ONTOLOGY/phenotype_association." . $releaseb . ".wb");
     $local_files = array("geneIDs" => "wormbase." . parent::getParameterValue('release') . ".genes.txt.gz", "functional_descriptions" => "wormbase." . parent::getParameterValue('release') . ".functional_descriptions.txt.gz", "gene_interactions" => "wormbase." . parent::getParameterValue('release') . ".gene_interactions.txt.gz", "gene_associations" => "wormbase." . parent::getParameterValue('release') . ".gene_association.wb", "phenotype_associations" => "wormbase." . parent::getParameterValue('release') . ".phenotype_associations.wb");
     $idir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     $dataset_description = '';
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     foreach ($files as $file) {
         $lfile = $idir . $local_files[$file];
         $rfile = $rdir . $remote_files[$file];
         if (!file_exists($lfile) or parent::getParameterValue('download') == true) {
             trigger_error($lfile . " not found. Will attempt to download." . PHP_EOL, E_USER_WARNING);
             echo "Downloading {$rfile}... ";
             Utils::DownloadSingle($rfile, $lfile);
             echo "done!" . PHP_EOL;
         }
         if (strstr($lfile, "gz")) {
             parent::setReadFile($lfile, TRUE);
         } else {
             parent::setReadFile($lfile, FALSE);
         }
         $suffix = parent::getParameterValue('output_format');
         $ofile = "wormbase." . $file . "." . $suffix;
         $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
         parent::setWriteFile($odir . $ofile, $gz);
         echo "Processing {$file}... ";
         $fnx = $file;
         $this->{$fnx}();
         echo "done!" . PHP_EOL;
         parent::getWriteFile()->close();
         // generate the dataset release file
         echo "Generating dataset description for {$ofile}... ";
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("WormBase Release " . parent::getParameterValue('release') . " subset ({$file})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://wormbase.org/")->setHomepage("http://wormbase.org/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("http://www.wormbase.org/about/policies")->setDataset("http://identifiers.org/wormbase/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/wormbase/wormbase.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
         echo "done!" . PHP_EOL;
     }
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
 }
Exemplo n.º 6
0
 /**
  *  Function to begin parsing the local copy of the pubchem substances directory
  **/
 function parse_substances()
 {
     $ignore = array(".", "..");
     $input_dir = $this->getParameterValue('indir') . "/substances/";
     $gz = false;
     $this->CreateDirectory($this->getParameterValue('outdir') . "/substances/");
     parent::setDatasetURI("bio2rdf_dataset:bio2rdf-" . $this->getPcsPrefix() . "-" . date("Ymd"));
     $graph_uri = parent::getGraphURI();
     //set graph URI to dataset uri
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $dataset_description = '';
     if ($handle = opendir($input_dir)) {
         while (false !== ($file = readdir($handle))) {
             if (in_array($file, $ignore)) {
                 continue;
             }
             echo "Processing file: " . $input_dir . $file . PHP_EOL;
             $suffix = parent::getParameterValue('output_format');
             $outfile = realpath($this->getParameterValue('outdir')) . "/substances/" . basename($file, ".xml.gz") . "." . $suffix;
             if (strstr(parent::getParameterValue('output_format'), "gz")) {
                 $gz = true;
             }
             echo "... into " . $outfile . PHP_EOL;
             parent::setCheckpoint('file');
             $this->setWriteFile($outfile, $gz);
             $this->parse_substance_file($input_dir, $file);
             $this->getWriteFile()->close();
         }
         closedir($handle);
         $source_file = (new DataResource($this))->setURI("http://www.ncbi.nlm.nih.gov/pcsubstance")->setTitle("PubChem Substance")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($input_dir)))->setFormat("text/xml")->setFormat("application/zip")->setPublisher("http://ncbi.nlm.nih.gov/")->setHomepage("http://pubchem.ncbi.nlm.nih.gov/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("ftp://ftp.ncbi.nlm.nih.gov/pubchem/README")->setDataset("http://identifiers.org/pubchem.substance/");
         $prefix = $this->getPcsPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pubchem/pubchem.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
         //set graph URI back to default
         parent::setGraphURI($graph_uri);
         // write the dataset description
         $this->setWriteFile($this->getParameterValue('outdir') . "/substances/" . $this->getBio2RDFReleaseFile());
         $this->getWriteFile()->write($dataset_description);
         $this->getWriteFile()->close();
     } else {
         echo "unable to read directory contents: " . $input_dir . "\n";
         exit;
     }
 }
Exemplo n.º 7
0
 function process()
 {
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     //which files are to be converted?
     $files = trim($this->GetParameterValue('files'));
     if ($files == 'all') {
         $files = $this->getPackageMap();
     } else {
         $sel_arr = explode(",", $files);
         $pm = $this->getPackageMap();
         $files = array();
         foreach ($sel_arr as $a) {
             if (array_key_exists($a, $pm)) {
                 $files[$a] = $pm[$a];
             }
         }
     }
     if ($this->getParameterValue('limit_organisms') == true) {
         $this->taxids = array_flip(explode(",", $this->getParameterValue('organisms')));
     }
     //set dataset graph to be dataset URI
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $dataset_description = '';
     //now iterate over the files array
     foreach ($files as $module => $rfilename) {
         $file = $module . ".gz";
         $lfile = $ldir . $file;
         $rfile = $rdir . $rfilename;
         // download
         if (!file_exists($lfile) || parent::getParameterValue('download') == true) {
             trigger_error("{$lfile} not found. Will attempt to download.", E_USER_NOTICE);
             $myfile = $lfile;
             if ($module == "gene2sts" || $module == "gene2unigene") {
                 $myfile = "compress.zlib://" . $lfile;
             }
             echo "downloading {$module} ...";
             utils::DownloadSingle($rfile, $myfile);
             echo "done" . PHP_EOL;
         }
     }
     foreach ($files as $module => $rfilename) {
         $file = $module . ".gz";
         $lfile = $ldir . $file;
         $rfile = $rdir . $rfilename;
         $ofile = $module . "." . parent::getParameterValue('output_format');
         $gz = false;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         echo "Processing {$module} ... ";
         parent::setReadFile($lfile, true);
         parent::setWriteFile($odir . $ofile, $gz);
         $fnx = $module;
         if ($module == 'gene2refseq') {
             $fnx = 'gene2accession';
         }
         $this->{$fnx}();
         parent::clear();
         echo 'done!' . PHP_EOL;
         parent::getReadFile()->close();
         parent::getWriteFile()->close();
         // generate the dataset release file
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("NCBI Gene ({$module})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://www.ncbi.nlm.nih.gov")->setHomepage("http://www.ncbi.nlm.nih.gov/gene")->setRights("use-share-modify")->setLicense("http://www.ncbi.nlm.nih.gov/About/disclaimer.html")->setDataset("http://identifiers.org/ncbigene/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ncbigene/ncbigene.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     }
     //foreach
     //set graph URI back to default value
     parent::setGraphURI($graph_uri);
     //write dataset description to file
     echo "Generating dataset description... ";
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Exemplo n.º 8
0
 function Run()
 {
     $idir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $files = parent::getParameterValue('files');
     // set the work
     if ($files != 'all') {
         // check if comma-separated, or hyphen-range
         $list = explode(",", $files);
         if (count($list) == 1) {
             // try hyphen separated
             $range = explode("-", $files);
             if (count($range) == 2) {
                 for ($i = $range[0]; $i <= $range[1]; $i++) {
                     $myfiles[] = $i;
                 }
             } else {
                 // must a single entry
                 $myfiles[] = $files;
             }
         } else {
             $myfiles = $list;
         }
     }
     $rest_uri = 'http://sabiork.h-its.org/sabioRestWebServices/';
     $getReactionIds_url = $rest_uri . "suggestions/SABIOReactionIDs";
     $reaction_list_file = $idir . "reactions.xml";
     if (!file_exists($reaction_list_file) || parent::getParameterValue('download') == 'true') {
         $xml = file_get_contents($getReactionIds_url);
         if (FALSE === $reaction_list_file) {
             exit;
         }
         $f = new FileFactory($reaction_list_file);
         $f->Write($xml);
         $f->Close();
     }
     $xml = simplexml_load_file($reaction_list_file);
     $total = count($xml->SABIOReactionID);
     if (isset($myfiles)) {
         $total = count($myfiles);
     }
     $i = 0;
     parent::setCheckpoint('dataset');
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $suffix = parent::getParameterValue('output_format');
     $ofile = "sabiork." . $suffix;
     $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
     parent::setWriteFile($odir . $ofile, $gz);
     foreach ($xml->SABIOReactionID as $rid) {
         parent::setCheckpoint('file');
         if (isset($myfiles)) {
             if (!in_array($rid, $myfiles)) {
                 continue;
             }
         }
         $i++;
         echo "{$i} / {$total} : reaction {$rid}" . PHP_EOL;
         $reaction_file = $idir . "reaction_" . $rid . ".owl.gz";
         if (!file_exists($reaction_file) || $this->GetParameterValue('download') == 'true') {
             $url = $rest_uri . 'searchKineticLaws/biopax?q=SabioReactionID:' . $rid;
             $data = file_get_contents($url);
             if ($data === FALSE) {
                 continue;
             }
             $f = new FileFactory($reaction_file, true);
             $f->Write($data);
             $f->Close();
         }
         $buf = file_get_contents("compress.zlib://" . $reaction_file);
         // send for parsing
         $p = new BioPAX2Bio2RDF($this);
         $p->SetBuffer($buf)->SetBioPAXVersion(3)->SetBaseNamespace("http://sabio.h-its.org/biopax#")->SetBio2RDFNamespace("http://bio2rdf.org/sabiork:")->SetDatasetURI($this->GetDatasetURI());
         $rdf = $p->Parse();
         parent::getWriteFile()->Write($rdf);
     }
     parent::getWriteFile()->Close();
     //generate dataset description
     echo "Generating dataset description... ";
     $source_file = (new DataResource($this))->setURI("http://sabiork.h-its.org/sabioRestWebServices/searchKineticLaws/biopax")->setTitle("SABIO-RK Biochemical Reaction Kinetics Database")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($odir . $ofile)))->setFormat("text/xml")->setPublisher("http://sabio.villa-bosch.de/")->setHomepage("http://sabio.villa-bosch.de/")->setRights("use-share-modify")->setRights("no-commercial")->setLicense("http://sabio.villa-bosch.de/layouts/content/termscondition.gsp")->setDataset("http://identifiers.org/sabiork.reaction/");
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sabiork/sabiork.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description = $source_file->toRDF() . $output_file->toRDF();
     //write dataset description to file
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Exemplo n.º 9
0
 function process()
 {
     if (parent::getParameterValue('files') == 'all') {
         $files = explode("|", parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(",", parent::getParameterValue('files'));
     }
     $remote_files = array("human" => "human_genes.zip", "models" => "models_genes.zip");
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     $dataset_description = '';
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     foreach ($files as $file) {
         $lfile = $ldir . $remote_files[$file];
         $rfile = $rdir . $remote_files[$file];
         if (!file_exists($lfile)) {
             trigger_error($lfile . " not found. Will attempt to download." . PHP_EOL, E_USER_WARNING);
             echo "Downloading {$rfile}... ";
             Utils::DownloadSingle($rfile, $lfile);
             echo "done!" . PHP_EOL;
         }
         $suffix = parent::getParameterValue('output_format');
         $ofile = "genage_" . $file . '.' . $suffix;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         $zin = new ZipArchive();
         if ($zin->open($lfile) === FALSE) {
             trigger_error("Unable to open {$lfile}");
             exit;
         }
         if ($file == "human") {
             $zipentry = "genage_human.csv";
         } else {
             if ($file == "models") {
                 $zipentry = "genage_models.csv";
             }
         }
         if (($fp = $zin->getStream($zipentry)) === FALSE) {
             trigger_error("Unable to get {$zipentry} in ziparchive {$lfile}");
             return FALSE;
         }
         parent::SetReadFile($lfile);
         parent::GetReadFile()->SetFilePointer($fp);
         // set the write file, parse, write and close
         $suffix = parent::getParameterValue('output_format');
         $outfile = "genage_" . $file . '.' . $suffix;
         $gz = false;
         if (strstr($suffix, "gz")) {
             $gz = true;
         }
         parent::setWriteFile($odir . $ofile, $gz);
         echo "Processing {$lfile}... ";
         $fnx = $file;
         $this->{$fnx}();
         echo "done!" . PHP_EOL;
         parent::getWriteFile()->close();
         // generate the dataset release file
         echo "Generating dataset description for {$ofile}... ";
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Human Ageing Genomic Resources GenAge database (" . $remote_files[$file] . ")")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/comma-separated-value")->setFormat("application/gzip")->setPublisher("http://genomics.senescence.info/")->setHomepage("http://genomics.senescence.info/genes/")->setRights("use")->setLicense("http://genomics.senescence.info/legal.html")->setDataset("http://identifiers.org/genage/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/genage/genage.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
         echo "done!" . PHP_EOL;
     }
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
 }
Exemplo n.º 10
0
 function Run()
 {
     // get the work
     if ($this->GetParameterValue('files') == 'all') {
         $sources = explode("|", parent::getParameterList('files'));
         array_shift($sources);
     } else {
         // comma separated list
         $sources = explode(",", parent::getParameterValue('files'));
     }
     $download_files = array("h**o-sapiens" => "Pathway%20Commons%202%20homo%20sapiens.BIOPAX.owl.gz", "hprd" => "Pathway%20Commons%202%20HPRD.BIOPAX.owl.gz", "humancyc" => "Pathway%20Commons%202%20HumanCyc.BIOPAX.owl.gz", "nci-nature" => "Pathway%20Commons%202%20NCI_Nature.BIOPAX.owl.gz", "panther-pathway" => "Pathway%20Commons%202%20PANTHER%20Pathway.BIOPAX.owl.gz", "phosphositeplus" => "Pathway%20Commons%202%20PhosphoSitePlus.BIOPAX.owl.gz", "reactome" => "Pathway%20Commons%202%20Reactome.BIOPAX.owl.gz");
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $dataset_description = '';
     // iterate over the requested data
     foreach ($sources as $source) {
         echo "processing {$source}... ";
         $ldir = parent::getParameterValue('indir');
         $odir = parent::getParameterValue('outdir');
         $rdir = parent::getParameterValue('download_url');
         // set the remote and input files
         $file = $source . ".owl";
         $zfile = $source . ".owl.gz";
         $rfile = $rdir . $download_files[$source];
         $lfile = $ldir . $zfile;
         // download if if the file doesn't exist locally or we are told to
         if (!file_exists($lfile) || $this->GetParameterValue('download') == 'true') {
             // download
             echo "downloading... ";
             file_put_contents($lfile, file_get_contents($rfile));
         }
         // extract the file out of the ziparchive
         // and load into a buffer
         echo 'extracting... ';
         if (($fpin = gzopen($lfile, "r")) === FALSE) {
             trigger_error("Unable to open {$lfile}", E_USER_ERROR);
             exit;
         }
         $data = '';
         while (!gzeof($fpin)) {
             $buffer = gzgets($fpin, 4096);
             $data .= $buffer;
         }
         gzclose($fpin);
         // set the output file
         $suffix = parent::getParameterValue('output_format');
         $outfile = $source . '.' . $suffix;
         $gz = false;
         if (strstr(parent::getParameterValue('output_format'), "gz")) {
             $gz = true;
         }
         parent::setWriteFile($odir . $outfile, $gz);
         // send for parsing
         $p = new BioPAX2Bio2RDF($this);
         $p->SetBuffer($data)->SetBioPAXVersion(3)->SetBaseNamespace("http://purl.org/pc2/3/")->SetBio2RDFNamespace("http://bio2rdf.org/pathwaycommons:")->SetDatasetURI(parent::getDatasetURI());
         $rdf = $p->Parse();
         parent::addRDF($rdf);
         // write to output
         parent::writeRDFBufferToWriteFile();
         parent::getWriteFile()->Close();
         echo "done!" . PHP_EOL;
         //generate dataset description
         echo "Generating dataset description for {$zfile}... ";
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Pathway Commons")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("rdf/xml")->setPublisher("http://www.pathwaycommons.org/")->setHomepage("http://www.pathwaycommons.org/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("http://www.pathwaycommons.org/pc2/home.html#data_sources")->setDataset("http://identifiers.org/pathwaycommons/");
         $dataset_description .= $source_file->toRDF();
         echo "done!" . PHP_EOL;
     }
     echo "Generating dataset description for Bio2RDF Pathways Commons dataset... ";
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pathwaycommons/pathwaycommons.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description .= $output_file->toRDF();
     //write dataset description to file
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Exemplo n.º 11
0
 function Run()
 {
     // directory shortcuts
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     // get the work specified
     $list = trim(parent::getParameterValue('files'));
     if ($list == 'all') {
         // call the getAllModelsId webservice
         $file = $ldir . "all_models.json";
         if (!file_exists($file)) {
             try {
                 $x = @new SoapClient("http://www.ebi.ac.uk/biomodels-main/services/BioModelsWebServices?wsdl");
             } catch (Exception $e) {
                 echo $e->getMessage();
             }
             $entries = $x->getAllModelsId();
             file_put_contents($file, json_encode($entries));
         } else {
             $entries = json_decode(file_get_contents($file));
         }
     } elseif ($list == 'curated') {
         // call the getAllCuratedModelsId webservice
         $file = $ldir . "curated_models.json";
         if (!file_exists($file)) {
             try {
                 $x = @new SoapClient("http://www.ebi.ac.uk/biomodels-main/services/BioModelsWebServices?wsdl");
             } catch (Exception $e) {
                 echo $e->getMessage();
             }
             $entries = $x->getAllCuratedModelsId();
             file_put_contents($file, json_encode($entries));
         } else {
             $entries = json_decode(file_get_contents($file));
         }
     } else {
         // check if a hyphenated list was provided
         if (($pos = strpos($list, "-")) !== FALSE) {
             $start_range = substr($list, 0, $pos);
             $end_range = substr($list, $pos + 1);
             for ($i = $start_range; $i <= $end_range; $i++) {
                 $entries[] = "BIOMD" . str_pad($i, 10, "0", STR_PAD_LEFT);
             }
         } else {
             // for comma separated list
             $b = explode(",", $this->GetParameterValue('files'));
             foreach ($b as $e) {
                 $entries[] = "BIOMD" . str_pad($e, 10, "0", STR_PAD_LEFT);
             }
         }
     }
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     // set the write file
     $suffix = parent::getParameterValue('output_format');
     $outfile = 'biomodels' . '.' . $suffix;
     $gz = false;
     if (strstr(parent::getParameterValue('output_format'), "gz")) {
         $gz = true;
     }
     $dataset_description = '';
     parent::setWriteFile($odir . $outfile, $gz);
     // iterate over the entries
     $i = 0;
     $total = count($entries);
     foreach ($entries as $id) {
         echo "processing " . ++$i . " of {$total} - biomodel# " . $id;
         $download_file = $ldir . $id . ".owl.gz";
         $url = parent::getParameterValue('download_url') . "publ/{$id}/{$id}-biopax3.owl";
         // download if the file doesn't exist or we are told to
         if (!file_exists($download_file) || $this->GetParameterValue('download') == 'true') {
             // download
             echo " - downloading";
             $ret = utils::downloadsingle($url, 'compress.zlib://' . $download_file, true);
             if ($ret === false) {
                 echo "\nTrying non-curated model";
                 $url = parent::getParametervalue('download_url') . "uncura_publ/{$id}/{$id}-biopax3.owl";
                 $ret = utils::downloadsingle($url, 'compress.zlib://' . $download_file, true);
                 if ($ret === false) {
                     continue;
                 }
             }
             echo " - downloaded";
         }
         // load entry, parse and write to file
         echo " - parsing... ";
         // $this->SetReadFile($download_file,true);
         $buf = file_get_contents("compress.zlib://" . $download_file);
         $converter = new BioPAX2Bio2RDF($this);
         $converter->SetBuffer($buf)->SetBioPAXVersion(3)->SetBaseNamespace("http://identifiers.org/biomodels.db/{$id}/")->SetBio2RDFNamespace("http://bio2rdf.org/biomodels:" . $id . "_")->SetDatasetURI($this->GetDatasetURI());
         $rdf = $converter->Parse();
         parent::addRDF($rdf);
         parent::writeRDFBufferToWriteFile();
         //generate dataset description
         $source_file = (new DataResource($this))->setURI($url)->setTitle("EBI BioModels Database - BioModel # {$id}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($download_file)))->setFormat("rdf/xml")->setPublisher("http://www.ebi.ac.uk/")->setHomepage("http://www.ebi.ac.uk/biomodels-main/")->setRights("use-share-modify")->setLicense("http://www.ebi.ac.uk/biomodels-main/termsofuse")->setDataset("http://identifiers.org/biomodels.db/");
         $dataset_description .= $source_file->toRDF();
         echo "done!" . PHP_EOL;
     }
     //foreach
     parent::getWriteFile()->close();
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/biomodels/biomodels.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description .= $output_file->toRDF();
     //write dataset description to file
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Exemplo n.º 12
0
 function run()
 {
     if (parent::getParameterValue('files') == 'all') {
         $files = explode("|", $this->GetParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(",", parent::getParameterValue('files'));
     }
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     $dataset_description = '';
     foreach ($files as $file) {
         $download = parent::getParameterValue('download');
         $lfile = $ldir . "goa_" . $file . ".gz";
         if (!file_exists($lfile) && $download == false) {
             trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE);
             $download = true;
         }
         //download file
         $rfile = $rdir . strtoupper($file) . "/gene_association.goa_" . $file . ".gz";
         if ($download == true) {
             echo "downloading {$file} ... ";
             //file_put_contents($lfile,file_get_contents($rfile));
             utils::DownloadSingle($rfile, $lfile);
         }
         $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
         $ofile = "goa_" . $file . "." . parent::getParameterValue('output_format');
         parent::setReadFile($lfile, TRUE);
         parent::setWriteFile($odir . $ofile, $gz);
         echo "processing {$file} ... ";
         $this->process($file);
         echo "done!";
         parent::clear();
         //close write file
         parent::getWriteFile()->close();
         echo PHP_EOL;
         // dataset description
         $graph_uri = parent::getGraphURI();
         if (parent::getParameterValue('dataset_graph') == true) {
             parent::setGraphURI(parent::getDatasetURI());
         }
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Gene Ontology Annotation file {$file} ({$rfile}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://www.ebi.ac.uk/")->setHomepage("http://www.ebi.ac.uk/GOA/")->setRights("use")->setLicense("http://www.ebi.ac.uk/GOA/goaHelp.html")->setDataset("http://identifiers.org/goa/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/irefindex/irefindex.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
         parent::setGraphURI($graph_uri);
     }
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Exemplo n.º 13
0
 function Run()
 {
     // get the file list
     if (parent::getParameterValue('files') == 'all') {
         $files = array('all');
     } else {
         $files = explode(",", parent::getParameterValue('files'));
     }
     $ldir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $rdir = parent::getParameterValue('download_url');
     $dataset_description = '';
     foreach ($files as $file) {
         $download = parent::getParameterValue('download');
         $version = parent::getParameterValue("version");
         $zip_file = ucfirst($file) . ".mitab." . $version . ".txt.zip";
         $lfile = $ldir . $zip_file;
         $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
         $ofile = "irefindex-" . $file . "." . parent::getParameterValue('output_format');
         if (!file_exists($lfile)) {
             trigger_error($lfile . " not found. Will attempt to download.", E_USER_NOTICE);
             $download = true;
         }
         $rfile = $rdir . $zip_file;
         if ($download == true) {
             echo "downloading {$rfile}" . PHP_EOL;
             if (FALSE === Utils::DownloadSingle($rfile, $lfile)) {
                 trigger_error("Error in Download");
                 return FALSE;
             }
         }
         $zin = new ZipArchive();
         if ($zin->open($lfile) === FALSE) {
             trigger_error("Unable to open {$lfile}");
             exit;
         }
         if ($zin->numFiles != 1) {
             trigger_error("Found more than one file ... using first file");
         }
         $f = $zin->statIndex(0);
         $base_file = $f['name'];
         if (($fp = $zin->getStream($base_file)) === FALSE) {
             trigger_error("Unable to get {$base_file} in ziparchive {$lfile}");
             return FALSE;
         }
         parent::setReadFile($lfile);
         parent::getReadFile()->setFilePointer($fp);
         echo "Processing " . $file . " ...";
         parent::setWriteFile($odir . $ofile, true);
         if ($this->Parse() === FALSE) {
             trigger_error("Parsing Error");
             exit;
         }
         parent::writeRDFBufferToWriteFile();
         parent::getWriteFile()->close();
         $zin->close();
         echo "Done!" . PHP_EOL;
         $graph_uri = parent::getGraphURI();
         if (parent::getParameterValue('dataset_graph') == true) {
             parent::setGraphURI(parent::getDatasetURI());
         }
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("iRefIndex ({$zip_file}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("text/tab-separated-value")->setFormat("application/zip")->setPublisher("http://irefindex.uio.no")->setHomepage("http://irefindex.uio.no")->setRights("use")->setRights("by-attribution")->setRights("no-commercial")->setLicense("http://irefindex.uio.no/wiki/README_MITAB2.6_for_iRefIndex#License")->setDataset("http://identifiers.org/irefindex/");
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/irefindex/irefindex.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
         parent::setGraphURI($graph_uri);
     }
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     return TRUE;
 }
Exemplo n.º 14
0
 function process()
 {
     if (parent::getParameterValue('files') == 'all') {
         $files = explode("|", parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode(",", parent::getParameterValue('files'));
     }
     $ldir = parent::getParameterValue('indir');
     $rdir = parent::getParameterValue('download_url');
     $odir = parent::getParameterValue('outdir');
     $rfiles = array("dbxref" => "curation/chromosomal_feature/dbxref.tab", "features" => "curation/chromosomal_feature/SGD_features.tab", "domains" => "curation/calculated_protein_info/domains/domains.tab", "protein" => "curation/calculated_protein_info/protein_properties.tab", "goa" => "curation/literature/gene_association.sgd.gz", "goslim" => "curation/literature/go_slim_mapping.tab", "complex" => "curation/literature/go_protein_complex_slim.tab", "interaction" => "curation/literature/interaction_data.tab", "phenotype" => "curation/literature/phenotype_data.tab", "pathways" => "curation/literature/biochemical_pathways.tab", "mapping" => "mapping");
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $gz = false;
     if (strstr(parent::getParameterValue('output_format'), "gz")) {
         $gz = true;
     }
     if (parent::getParameterValue('one_file') == true) {
         $ofile = "sgd." . parent::getParameterValue('output_format');
         parent::setWriteFile($odir . $ofile, $gz);
     }
     $dataset_description = '';
     foreach ($files as $file) {
         $ext = substr(strrchr($rfiles[$file], '.'), 1);
         if ($ext == "tab") {
             $lfile = "sgd_" . $file . ".tab";
         } elseif ($ext = "gz") {
             $lfile = "sgd_" . $file . ".tab.gz";
         }
         $rfile = $rdir . $rfiles[$file];
         if (!file_exists($ldir . $lfile) && parent::getParameterValue('download') == false && $file != 'mapping') {
             trigger_error($ldir . $lfile . " not found. Will attempt to download.", E_USER_NOTICE);
             Utils::DownloadSingle($rfile, $ldir . $lfile);
         }
         if (parent::getParameterValue('one_file') == false) {
             $ofile = "sgd_" . $file . '.' . parent::getParameterValue('output_format');
             parent::setWriteFile($odir . $ofile, $gz);
         }
         //parse file
         parent::setReadFile($ldir . $lfile, $gz);
         $fnx = $file;
         echo "Processing {$file}... ";
         $this->{$fnx}();
         echo PHP_EOL . "done!";
         //write RDF to file
         parent::writeRDFBufferToWriteFile();
         //close write file
         if (parent::getParameterValue('one_file') == false) {
             parent::getWriteFile()->close();
         }
         echo PHP_EOL;
         // generate the dataset release file
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Saccharomyces Genome Database ({$file})")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($ldir . $lfile)))->setFormat("text/tab-separated-value")->setFormat("application/gzip")->setPublisher("http://www.yeastgenome.org/")->setHomepage("http://www.yeastgenome.org/")->setRights("use")->setLicense("http://www.stanford.edu/site/terms.html")->setDataset("http://identifiers.org/sgd/");
         $dataset_description .= $source_file->toRDF();
         if (parent::getParameterValue('one_file') == false) {
             $prefix = parent::getPrefix();
             $bVersion = parent::getParameterValue('bio2rdf_release');
             $date = date("Y-m-d\\TG:i:s\\Z");
             $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sgd/sgd.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
             if ($gz) {
                 $output_file->setFormat("application/gzip");
             }
             if (strstr(parent::getParameterValue('output_format'), "nt")) {
                 $output_file->setFormat("application/n-triples");
             } else {
                 $output_file->setFormat("application/n-quads");
             }
             $dataset_description .= $output_file->toRDF();
         }
     }
     //foreach
     //set graph URI back to default
     parent::setGraphURI($graph_uri);
     if (parent::getParameterValue('one_file') == true) {
         $prefix = parent::getPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} - {$file}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sgd/sgd.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
     }
     //write dataset description to file
     echo "Generating dataset description... " . PHP_EOL;
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Exemplo n.º 15
0
 function Run()
 {
     $indir = parent::getParameterValue('indir');
     $outdir = parent::getParameterValue('outdir');
     $download_url = parent::getParameterValue('download_url');
     if (parent::getParameterValue('files') == 'all') {
         $files = explode("|", parent::getParameterList('files'));
         array_shift($files);
     } else {
         $files = explode("|", parent::getParameterValue('files'));
     }
     if (parent::getParameterValue("id_list")) {
         $this->id_list = array_flip(explode(",", parent::getParameterValue('id_list')));
     }
     $dataset_description = '';
     foreach ($files as $f) {
         if ($f == 'drugbank') {
             $file = 'drugbank.xml.zip';
             $lname = 'drugbank';
         }
         $fnx = 'parse_' . $f;
         $rfile = parent::getParameterValue('download_url') . $file;
         $lfile = parent::getParameterValue('indir') . $file;
         $cfile = $lname . "." . parent::getParameterValue('output_format');
         // download
         if (!file_exists($lfile) || parent::getParameterValue('download') == true) {
             utils::downloadSingle($rfile, $lfile);
         }
         // setup the write
         $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
         parent::setWriteFile($outdir . $cfile, $gz);
         echo $outdir . $cfile;
         if (file_exists($indir . $file)) {
             // call the parser
             echo "processing {$file} ..." . PHP_EOL;
             $this->{$fnx}($indir, $file);
             echo "done" . PHP_EOL;
             parent::clear();
         }
         parent::getWriteFile()->close();
         // dataset description
         $ouri = parent::getGraphURI();
         parent::setGraphURI(parent::getDatasetURI());
         $source_version = parent::getDatasetVersion();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $prefix = parent::getPrefix();
         $date = date("Y-m-d\\TH:i:sP");
         // dataset description
         $source_file = (new DataResource($this))->setURI($rfile)->setTitle("DrugBank ({$file})")->setRetrievedDate(date("Y-m-d\\TH:i:sP", filemtime($indir . $file)))->setFormat("application/xml")->setFormat("application/zip")->setPublisher("http://drugbank.ca")->setHomepage("http://drugbank.ca")->setRights("use")->setRights("by-attribution")->setRights("no-commercial")->setLicense("http://www.drugbank.ca/about")->setDataset("http://identifiers.org/drugbank/");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$cfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} v{$source_version}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/drugbank/drugbank.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         parent::writeToReleaseFile($source_file->toRDF() . $output_file->toRDF());
         parent::setGraphURI($ouri);
     }
     parent::closeReleaseFile();
 }
Exemplo n.º 16
0
 function OBO2RDF($abbv)
 {
     $abbv = strtolower($abbv);
     if ($abbv == "doid") {
         $abbv = "do";
     }
     $minimal = parent::getParameterValue('detail') == 'min' ? true : false;
     $minimalp = parent::getParameterValue('detail') == 'min+' ? true : false;
     $version = parent::getParameterValue("bio2rdf_release");
     $tid = '';
     $first = true;
     $is_a = false;
     $is_deprecated = false;
     $min = $buf = '';
     $ouri = "http://bio2rdf.org/lsr:" . $abbv;
     $dataset_uri = $abbv . "_resource:bio2rdf.dataset.{$abbv}.R" . $version;
     parent::setGraphURI($dataset_uri);
     $buf = parent::triplify($ouri, "rdf:type", "owl:Ontology");
     $graph_uri = '<' . parent::getRegistry()->getFQURI(parent::getGraphURI()) . '>';
     $bid = 1;
     while ($l = parent::getReadFile()->read()) {
         $lt = trim($l);
         if (strlen($lt) == 0) {
             continue;
         }
         if ($lt[0] == '!') {
             continue;
         }
         if (strstr($l, "[Term]")) {
             // first node?
             if ($first == true) {
                 // ignore the first case
                 $first = false;
             } else {
                 if ($tid != '' && $is_a == false && $is_deprecated == false) {
                     $t = parent::triplify($tid, "rdfs:subClassOf", "obo_vocabulary:Entity");
                     $buf .= $t;
                     $min .= $t;
                 }
             }
             $is_a = false;
             $is_deprecated = false;
             unset($typedef);
             $term = '';
             $tid = '';
             continue;
         } else {
             if (strstr($l, "[Typedef]")) {
                 $is_a = false;
                 $is_deprecated = false;
                 unset($term);
                 $tid = '';
                 $typedef = '';
                 continue;
             }
         }
         //echo "LINE: $l".PHP_EOL;
         // to fix error in obo generator
         $lt = str_replace("synonym ", "synonym: ", $lt);
         $lt = preg_replace("/\\{.*\\} !/", " !", $lt);
         $a = explode(" !", $lt);
         if (isset($a[1])) {
             $exc = trim($a[1]);
         }
         $a = explode(": ", trim($a[0]), 2);
         // let's go
         if (isset($intersection_of)) {
             if ($a[0] != "intersection_of") {
                 //		$intersection_of .= ")].".PHP_EOL;
                 //$buf .= $intersection_of;
                 if ($minimalp) {
                     $min .= $intersection_of;
                 }
                 unset($intersection_of);
             }
         }
         if (isset($relationship)) {
             if ($a[0] != "relationship") {
                 //	$relationship .= ")].".PHP_EOL;
                 //$buf .= $relationship;
                 if ($minimalp) {
                     $min .= $relationship;
                 }
                 unset($relationship);
             }
         }
         if (isset($typedef)) {
             if ($a[0] == "id") {
                 $c = explode(":", $a[1]);
                 if (count($c) == 1) {
                     $ns = "obo";
                     $id = $c[0];
                 } else {
                     $ns = strtolower($c[0]);
                     $id = $c[1];
                 }
                 $id = str_replace(array("(", ")"), array("_", ""), $id);
                 $tid = $ns . ":" . $id;
             } else {
                 if ($a[0] == "name") {
                     $buf .= parent::describeClass($tid, addslashes(stripslashes($a[1])));
                 } else {
                     if ($a[0] == "is_a") {
                         if (FALSE !== ($pos = strpos($a[1], "!"))) {
                             $a[1] = substr($a[1], 0, $pos - 1);
                         }
                         $buf .= parent::triplify($tid, "rdfs:subPropertyOf", "obo_vocabulary:" . strtolower($a[1]));
                     } else {
                         if ($a[0] == "is_obsolete") {
                             $buf .= parent::triplify($tid, "rdf:type", "owl:DeprecatedClass");
                             $is_deprecated = true;
                         } else {
                             if ($a[0][0] == "!") {
                                 $a[0] = substr($a[0], 1);
                             }
                             $buf .= parent::triplifyString($tid, "obo_vocabulary:{$a['0']}", str_replace('"', '', stripslashes($a[1])));
                         }
                     }
                 }
             }
         } else {
             if (isset($term)) {
                 if ($a[0] == "is_obsolete" && $a[1] == "true") {
                     $t = parent::triplify($tid, "rdf:type", "owl:DeprecatedClass");
                     $t .= parent::triplify($tid, "rdfs:subClassOf", "owl:DeprecatedClass");
                     $min .= $t;
                     $buf .= $t;
                     $is_deprecated = true;
                 } else {
                     if ($a[0] == "id") {
                         parent::getRegistry()->parseQName($a[1], $ns, $id);
                         $tid = "{$ns}:{$id}";
                         //					$buf .= parent::describeClass($tid,null,"owl:Class");
                         //					$buf .= parent::triplify($tid,"rdfs:isDefinedBy",$ouri);
                     } else {
                         if ($a[0] == "name") {
                             //					$t = parent::triplifyString($tid,"rdfs:label",str_replace(array("\"", "'"), array("","\\\'"), stripslashes($a[1]))." [$tid]");
                             $label = str_replace(array("\"", "'"), array("", "\\\\'"), stripslashes($a[1]));
                             $t = parent::describeIndividual($tid, $label, "owl:Class");
                             $t .= parent::triplify($tid, "rdfs:isDefinedBy", $ouri);
                             $min .= $t;
                             $buf .= $t;
                         } else {
                             if ($a[0] == "def") {
                                 $t = str_replace(array("'", "\"", "\\", "\\\\'"), array("\\\\'", "", "", ""), $a[1]);
                                 $min .= parent::triplifyString($tid, "dc:description", $t);
                                 $buf .= parent::triplifyString($tid, "dc:description", $t);
                             } else {
                                 if ($a[0] == "property_value") {
                                     $b = explode(" ", $a[1]);
                                     $buf .= parent::triplifyString($tid, "obo_vocabulary:" . strtolower($b[0]), str_replace("\"", "", strtolower($b[1])));
                                 } else {
                                     if ($a[0] == "xref") {
                                         // http://upload.wikimedia.org/wikipedia/commons/3/34/Anatomical_Directions_and_Axes.JPG
                                         // Medical Dictionary:http\://www.medterms.com/
                                         // KEGG COMPOUND:C02788 "KEGG COMPOUND"
                                         // id-validation-regexp:\"REACT_[0-9\]\{1\,4}\\.[0-9\]\{1\,3}|[0-9\]+\"
                                         //$a[1] = 'id-validation-regexp:\"REACT_[0-9\]\{1\,4}\\.[0-9\]\{1\,3}|[0-9\]+\"';
                                         if (substr($a[1], 0, 4) == "http") {
                                             $buf .= parent::triplify($tid, "rdfs:seeAlso", str_replace(array(" ", '"wiki"', "\\"), array("+", "", ""), $a[1]));
                                         } else {
                                             $b = explode(":", $a[1], 2);
                                             if (substr($b[1], 0, 4) == "http") {
                                                 $buf .= parent::triplify($tid, "rdfs:seeAlso", stripslashes($b[1]));
                                             } else {
                                                 $ns = str_replace(array(" ", "\\"), "", strtolower($b[0]));
                                                 $id = trim($b[1]);
                                                 // there may be a comment to remove
                                                 if (FALSE !== ($pos = strrpos($id, ' "'))) {
                                                     $comment = substr($id, $pos + 1, -1);
                                                     $id = substr($id, 0, $pos);
                                                 }
                                                 $id = stripslashes($id);
                                                 // there may be a source statement to remove
                                                 $id = preg_replace("/{.*\\}/", "", $id);
                                                 if ($ns == "pmid") {
                                                     $ns = "pubmed";
                                                     $y = explode(" ", $id);
                                                     $id = $y[0];
                                                 }
                                                 if ($ns == "xx") {
                                                     continue;
                                                 }
                                                 if ($ns == "icd9cm") {
                                                     $y = explode(" ", $id);
                                                     $id = $y[0];
                                                 }
                                                 if ($ns == "xref; umls_cui") {
                                                     continue;
                                                 }
                                                 if ($ns == "submitter") {
                                                     $ns = "chebi.submitter";
                                                 }
                                                 if ($ns == "wikipedia" || $ns == "mesh") {
                                                     $id = str_replace(" ", "+", $id);
                                                 }
                                                 if ($ns == "id-validation-regexp") {
                                                     $buf .= parent::triplifyString($tid, "obo_vocabulary:{$ns}", addslashes($id));
                                                 } else {
                                                     $buf .= parent::triplify($tid, "obo_vocabulary:x-{$ns}", "{$ns}:" . str_replace(" ", "-", $id));
                                                 }
                                             }
                                         }
                                     } else {
                                         if ($a[0] == "synonym") {
                                             // synonym: "entidades moleculares" RELATED [IUPAC:]
                                             // synonym: "molecular entity" EXACT IUPAC_NAME [IUPAC:]
                                             // synonym: "Chondrococcus macrosporus" RELATED synonym [NCBITaxonRef:Krzemieniewska_and_Krzemieniewski_1926]
                                             //grab string inside double quotes
                                             preg_match('/"(.*)"(.*)/', $a[1], $matches);
                                             if (!empty($matches)) {
                                                 $a[1] = str_replace(array("\\", "\"", "'"), array("", "", "\\\\'"), $matches[1] . $matches[2]);
                                             } else {
                                                 $a[1] = str_replace(array("\"", "'"), array("", "\\\\'"), $a[1]);
                                             }
                                             $rel = "SYNONYM";
                                             $list = array("EXACT", "BROAD", "RELATED", "NARROW");
                                             $found = false;
                                             foreach ($list as $keyword) {
                                                 // get everything after the keyword up until the bracket [
                                                 if (FALSE !== ($k_pos = strpos($a[1], $keyword))) {
                                                     $str_len = strlen($a[1]);
                                                     $keyword_len = strlen($keyword);
                                                     $keyword_end_pos = $k_pos + $keyword_len;
                                                     $b1_pos = strrpos($a[1], "[");
                                                     $b2_pos = strrpos($a[1], "]");
                                                     $b_text = substr($a[1], $b1_pos + 1, $b2_pos - $b1_pos - 1);
                                                     $diff = $b1_pos - $keyword_end_pos - 1;
                                                     if ($diff != 0) {
                                                         // then there is more stuff here
                                                         $k = substr($a[1], $keyword_end_pos + 1, $diff);
                                                         $rel = trim($k);
                                                     } else {
                                                         // create the long predicate
                                                         $rel = $keyword . "_SYNONYM";
                                                     }
                                                     $found = true;
                                                     $str = substr($a[1], 0, $k_pos - 1);
                                                     break;
                                                 }
                                             }
                                             // check to see if we still haven't found anything
                                             if ($found === false) {
                                                 // we didn't find one of the keywords
                                                 // so take from the start to the bracket
                                                 $b1_pos = strrpos($a[1], "[");
                                                 $str = substr($a[1], 0, $b1_pos - 1);
                                             }
                                             $rel = str_replace(" ", "_", $rel);
                                             // $lit = addslashes($str.($b_text?" [".$b_text."]":""));
                                             $l = parent::triplifyString($tid, "obo_vocabulary:" . strtolower($rel), $str);
                                             $buf .= $l;
                                         } else {
                                             if ($a[0] == "alt_id") {
                                                 parent::getRegistry()->parseQname($a[1], $ns, $id);
                                                 if ($id != 'curators') {
                                                     $buf .= parent::triplify("{$ns}:{$id}", "rdfs:seeAlso", stripslashes($tid));
                                                 }
                                             } else {
                                                 if ($a[0] == "is_a") {
                                                     // do subclassing
                                                     parent::getRegistry()->parseQName($a[1], $ns, $id);
                                                     $t = parent::triplify($tid, "rdfs:subClassOf", "{$ns}:{$id}");
                                                     $buf .= $t;
                                                     $min .= $t;
                                                     $is_a = true;
                                                 } else {
                                                     if ($a[0] == "intersection_of") {
                                                         if (!isset($intersection_of)) {
                                                             // $intersection_of = '<'.parent::getRegistry()->getFQURI($tid).'> <'.parent::getRegistry()->getFQURI('owl:equivalentClass').'> [<'.parent::getRegistry()->getFQURI('rdf:type').'> <'.parent::getRegistry()->getFQURI('owl:Class').'>; <'.parent::getRegistry()->getFQURI('owl:intersectionOf').'> (';
                                                             $intersection_of = '<' . parent::getRegistry()->getFQURI($tid) . '> <' . parent::getRegistry()->getFQURI('owl:equivalentClass') . '> _:b' . ++$bid . " {$graph_uri} ." . PHP_EOL;
                                                             $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('rdf:type') . '> <' . parent::getRegistry()->getFQURI('owl:Class') . "> {$graph_uri} ." . PHP_EOL;
                                                             $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:intersectionOf') . '> _:b' . ++$bid . " {$graph_uri} ." . PHP_EOL;
                                                         }
                                                         /*
                                                         intersection_of: ECO:0000206 ! BLAST evidence
                                                         intersection_of: develops_from VAO:0000092 ! chondrogenic condensation
                                                         intersection_of: OBO_REL:has_part VAO:0000040 ! cartilage tissue
                                                         */
                                                         $c = explode(" ", $a[1]);
                                                         if (count($c) == 1) {
                                                             // just a class
                                                             parent::getRegistry()->parseQName($c[0], $ns, $id);
                                                             $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('rdfs:subClassOf') . '> <' . parent::getRegistry()->getFQURI("{$ns}:{$id}") . "> {$graph_uri} ." . PHP_EOL;
                                                             $buf .= parent::triplify($tid, "rdfs:subClassOf", "{$ns}:{$id}");
                                                         } else {
                                                             if (count($c) == 2) {
                                                                 // an expression
                                                                 parent::getRegistry()->parseQName($c[0], $pred_ns, $pred_id);
                                                                 parent::getRegistry()->parseQName($c[1], $obj_ns, $obj_id);
                                                                 $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:onProperty') . '> <' . parent::getRegistry()->getFQURI("obo_vocabulary:" . $pred_id) . "> {$graph_uri} ." . PHP_EOL;
                                                                 $intersection_of .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:someValuesFrom') . '> <' . parent::getRegistry()->getFQURI("{$obj_ns}:{$obj_id}") . ">  {$graph_uri} ." . PHP_EOL;
                                                                 $buf .= parent::triplify($tid, "obo_vocabulary:{$pred_id}", "{$obj_ns}:{$obj_id}");
                                                             }
                                                         }
                                                     } else {
                                                         if ($a[0] == "relationship") {
                                                             if (!isset($relationship)) {
                                                                 $relationship = '<' . parent::getRegistry()->getFQURI($tid) . '> <' . parent::getRegistry()->getFQURI('rdfs:subClassOf') . '> _:b' . ++$bid . " {$graph_uri} ." . PHP_EOL;
                                                                 $relationship .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('rdf:type') . '> <' . parent::getRegistry()->getFQURI('owl:Class') . "> {$graph_uri} ." . PHP_EOL;
                                                                 $relationship .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:intersectionOf') . '> _:b' . ++$bid . " {$graph_uri} ." . PHP_EOL;
                                                             }
                                                             /*
                                                             relationship: develops_from VAO:0000092 ! chondrogenic condensation
                                                             relationship: OBO_REL:has_part VAO:0000040 ! cartilage tissue
                                                             */
                                                             $c = explode(" ", $a[1]);
                                                             if (count($c) == 1) {
                                                                 // just a class
                                                                 parent::getRegistry()->parseQName($c[0], $ns, $id);
                                                                 $relationship .= parent::getRegistry()->getFQURI("{$ns}:{$id}");
                                                                 $buf .= parent::triplify($tid, "rdfs:subClassOf", "{$ns}:{$id}");
                                                             } else {
                                                                 if (count($c) == 2) {
                                                                     // an expression
                                                                     parent::getRegistry()->parseQName($c[0], $pred_ns, $pred_id);
                                                                     parent::getRegistry()->parseQName($c[1], $obj_ns, $obj_id);
                                                                     $relationship .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:onProperty') . '> <' . parent::getRegistry()->getFQURI("obo_vocabulary:" . $pred_id) . ">  {$graph_uri} ." . PHP_EOL;
                                                                     $relationship .= '_:b' . $bid . ' <' . parent::getRegistry()->getFQURI('owl:someValuesFrom') . '> <' . parent::getRegistry()->getFQURI("{$obj_ns}:{$obj_id}") . "> {$graph_uri} ." . PHP_EOL;
                                                                     $buf .= parent::triplify($tid, "obo_vocabulary:{$pred_id}", "{$obj_ns}:{$obj_id}");
                                                                 }
                                                             }
                                                         } else {
                                                             // default handler
                                                             if (isset($a[1])) {
                                                                 $buf .= parent::triplifyString($tid, "obo_vocabulary:{$a['0']}", str_replace(array("\"", "'"), array("", "\\\\'"), stripslashes($a[1])));
                                                             }
                                                         }
                                                     }
                                                 }
                                             }
                                         }
                                     }
                                 }
                             }
                         }
                     }
                 }
             } else {
                 //header
                 //format-version: 1.0
                 $buf .= parent::triplifyString($ouri, "obo_vocabulary:{$a['0']}", str_replace(array('"', '\\:'), array('\\"', ':'), isset($a[1]) ? $a[1] : ""));
             }
         }
         if ($minimal || $minimalp) {
             parent::getWriteFile()->write($min);
         } else {
             parent::getWriteFile()->write($buf);
         }
         $min = '';
         $buf = '';
         $header = '';
     }
     //if(isset($intersection_of))  $buf .= $intersection_of.")].".PHP_EOL;
     //if(isset($relationship))  $buf .= $relationship.")].".PHP_EOL;
     if ($minimal || $minimalp) {
         parent::getWriteFile()->Write($min);
     } else {
         parent::getWriteFile()->write($buf);
     }
 }