Ejemplo n.º 1
0
 /**
  *  Function to begin parsing the local copy of the pubchem substances directory
  **/
 function parse_substances()
 {
     $ignore = array(".", "..");
     $input_dir = $this->getParameterValue('indir') . "/substances/";
     $gz = false;
     $this->CreateDirectory($this->getParameterValue('outdir') . "/substances/");
     parent::setDatasetURI("bio2rdf_dataset:bio2rdf-" . $this->getPcsPrefix() . "-" . date("Ymd"));
     $graph_uri = parent::getGraphURI();
     //set graph URI to dataset uri
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $dataset_description = '';
     if ($handle = opendir($input_dir)) {
         while (false !== ($file = readdir($handle))) {
             if (in_array($file, $ignore)) {
                 continue;
             }
             echo "Processing file: " . $input_dir . $file . PHP_EOL;
             $suffix = parent::getParameterValue('output_format');
             $outfile = realpath($this->getParameterValue('outdir')) . "/substances/" . basename($file, ".xml.gz") . "." . $suffix;
             if (strstr(parent::getParameterValue('output_format'), "gz")) {
                 $gz = true;
             }
             echo "... into " . $outfile . PHP_EOL;
             parent::setCheckpoint('file');
             $this->setWriteFile($outfile, $gz);
             $this->parse_substance_file($input_dir, $file);
             $this->getWriteFile()->close();
         }
         closedir($handle);
         $source_file = (new DataResource($this))->setURI("http://www.ncbi.nlm.nih.gov/pcsubstance")->setTitle("PubChem Substance")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($input_dir)))->setFormat("text/xml")->setFormat("application/zip")->setPublisher("http://ncbi.nlm.nih.gov/")->setHomepage("http://pubchem.ncbi.nlm.nih.gov/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("ftp://ftp.ncbi.nlm.nih.gov/pubchem/README")->setDataset("http://identifiers.org/pubchem.substance/");
         $prefix = $this->getPcsPrefix();
         $bVersion = parent::getParameterValue('bio2rdf_release');
         $date = date("Y-m-d\\TG:i:s\\Z");
         $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pubchem/pubchem.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
         if ($gz) {
             $output_file->setFormat("application/gzip");
         }
         if (strstr(parent::getParameterValue('output_format'), "nt")) {
             $output_file->setFormat("application/n-triples");
         } else {
             $output_file->setFormat("application/n-quads");
         }
         $dataset_description .= $source_file->toRDF() . $output_file->toRDF();
         //set graph URI back to default
         parent::setGraphURI($graph_uri);
         // write the dataset description
         $this->setWriteFile($this->getParameterValue('outdir') . "/substances/" . $this->getBio2RDFReleaseFile());
         $this->getWriteFile()->write($dataset_description);
         $this->getWriteFile()->close();
     } else {
         echo "unable to read directory contents: " . $input_dir . "\n";
         exit;
     }
 }