Beispiel #1
0
 function Run()
 {
     $idir = parent::getParameterValue('indir');
     $odir = parent::getParameterValue('outdir');
     $files = parent::getParameterValue('files');
     // set the work
     if ($files != 'all') {
         // check if comma-separated, or hyphen-range
         $list = explode(",", $files);
         if (count($list) == 1) {
             // try hyphen separated
             $range = explode("-", $files);
             if (count($range) == 2) {
                 for ($i = $range[0]; $i <= $range[1]; $i++) {
                     $myfiles[] = $i;
                 }
             } else {
                 // must a single entry
                 $myfiles[] = $files;
             }
         } else {
             $myfiles = $list;
         }
     }
     $rest_uri = 'http://sabiork.h-its.org/sabioRestWebServices/';
     $getReactionIds_url = $rest_uri . "suggestions/SABIOReactionIDs";
     $reaction_list_file = $idir . "reactions.xml";
     if (!file_exists($reaction_list_file) || parent::getParameterValue('download') == 'true') {
         $xml = file_get_contents($getReactionIds_url);
         if (FALSE === $reaction_list_file) {
             exit;
         }
         $f = new FileFactory($reaction_list_file);
         $f->Write($xml);
         $f->Close();
     }
     $xml = simplexml_load_file($reaction_list_file);
     $total = count($xml->SABIOReactionID);
     if (isset($myfiles)) {
         $total = count($myfiles);
     }
     $i = 0;
     parent::setCheckpoint('dataset');
     $graph_uri = parent::getGraphURI();
     if (parent::getParameterValue('dataset_graph') == true) {
         parent::setGraphURI(parent::getDatasetURI());
     }
     $suffix = parent::getParameterValue('output_format');
     $ofile = "sabiork." . $suffix;
     $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false;
     parent::setWriteFile($odir . $ofile, $gz);
     foreach ($xml->SABIOReactionID as $rid) {
         parent::setCheckpoint('file');
         if (isset($myfiles)) {
             if (!in_array($rid, $myfiles)) {
                 continue;
             }
         }
         $i++;
         echo "{$i} / {$total} : reaction {$rid}" . PHP_EOL;
         $reaction_file = $idir . "reaction_" . $rid . ".owl.gz";
         if (!file_exists($reaction_file) || $this->GetParameterValue('download') == 'true') {
             $url = $rest_uri . 'searchKineticLaws/biopax?q=SabioReactionID:' . $rid;
             $data = file_get_contents($url);
             if ($data === FALSE) {
                 continue;
             }
             $f = new FileFactory($reaction_file, true);
             $f->Write($data);
             $f->Close();
         }
         $buf = file_get_contents("compress.zlib://" . $reaction_file);
         // send for parsing
         $p = new BioPAX2Bio2RDF($this);
         $p->SetBuffer($buf)->SetBioPAXVersion(3)->SetBaseNamespace("http://sabio.h-its.org/biopax#")->SetBio2RDFNamespace("http://bio2rdf.org/sabiork:")->SetDatasetURI($this->GetDatasetURI());
         $rdf = $p->Parse();
         parent::getWriteFile()->Write($rdf);
     }
     parent::getWriteFile()->Close();
     //generate dataset description
     echo "Generating dataset description... ";
     $source_file = (new DataResource($this))->setURI("http://sabiork.h-its.org/sabioRestWebServices/searchKineticLaws/biopax")->setTitle("SABIO-RK Biochemical Reaction Kinetics Database")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($odir . $ofile)))->setFormat("text/xml")->setPublisher("http://sabio.villa-bosch.de/")->setHomepage("http://sabio.villa-bosch.de/")->setRights("use-share-modify")->setRights("no-commercial")->setLicense("http://sabio.villa-bosch.de/layouts/content/termscondition.gsp")->setDataset("http://identifiers.org/sabiork.reaction/");
     $prefix = parent::getPrefix();
     $bVersion = parent::getParameterValue('bio2rdf_release');
     $date = date("Y-m-d\\TG:i:s\\Z");
     $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sabiork/sabiork.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI());
     if ($gz) {
         $output_file->setFormat("application/gzip");
     }
     if (strstr(parent::getParameterValue('output_format'), "nt")) {
         $output_file->setFormat("application/n-triples");
     } else {
         $output_file->setFormat("application/n-quads");
     }
     $dataset_description = $source_file->toRDF() . $output_file->toRDF();
     //write dataset description to file
     parent::setGraphURI($graph_uri);
     parent::setWriteFile($odir . parent::getBio2RDFReleaseFile());
     parent::getWriteFile()->write($dataset_description);
     parent::getWriteFile()->close();
     echo "done!" . PHP_EOL;
 }
Beispiel #2
0
 /**
  *  parse an individual pubchem substance file
  **/
 function parse_substance_file($indir, $file)
 {
     $xml = new CXML($indir, $file);
     while ($xml->Parse("PC-Substance") == TRUE) {
         parent::setCheckpoint('record');
         $this->parse_substance_record($xml);
     }
 }
Beispiel #3
0
 function freq()
 {
     $cols = 10;
     $i = 1;
     parent::setCheckpoint('file');
     while ($l = parent::getReadFile()->read()) {
         $a = explode("\t", str_replace("%", "", $l));
         if (count($a) != $cols) {
             trigger_error("Expecting {$cols}, but found " . count($a) . " instead... skipping file!", E_USER_ERROR);
             return false;
         }
         list($stitch_flat, $stitch_stereo, $cui, $placebo, $freq, $freq_lower, $freq_upper, $concept_type, $meddra_concept_id, $meddra_concept_label) = $a;
         if ($concept_type == "LLT") {
             continue;
         }
         $meddra_concept_label = trim($meddra_concept_label);
         $id = "stitch_resource:" . md5("se_freq" . $l);
         $stitch_flat = "stitch:{$stitch_flat}";
         $label = "{$meddra_concept_label} frequency for {$stitch_flat}";
         parent::addRDF(parent::describeIndividual($id, $label, parent::getVoc() . "Drug-Effect-Frequency") . parent::describeClass(parent::getVoc() . "Drug-Effect-Frequency", "SIDER Drug-Effect and Frequency") . parent::triplify($id, parent::getVoc() . "drug", $stitch_flat) . parent::triplify($id, parent::getVoc() . "effect", "umls:" . $meddra_concept_id));
         if ($placebo) {
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "placebo", "true", "xsd:boolean"));
         }
         $number = false;
         if (is_numeric($freq)) {
             $flabel = $freq . "%";
             $ftype_label = "Exact-Frequency";
             $ftype = parent::getVoc() . $ftype_label;
             $number = true;
         } else {
             $flabel = $freq;
             $ftype_label = "Qualitative-Frequency";
             $ftype = parent::getVoc() . "{$ftype_label}";
         }
         if ($freq_lower != $freq_upper) {
             $flabel .= "({$freq_lower}-{$freq_upper})";
             $ftype_label = "Range-Frequency";
             $ftype = parent::getVoc() . $ftype_label;
         }
         $fid = $id . md5($a[5] . $a[6] . $a[8]);
         parent::addRDF(parent::triplify($id, parent::getVoc() . "frequency", $fid) . parent::describeIndividual($fid, $flabel, $ftype) . parent::describeClass($ftype, $ftype_label));
         if ($number == true) {
             parent::addRDF(parent::triplifyString($fid, parent::getVoc() . "frequency-value", $freq / 100));
         } else {
             parent::addRDF(parent::triplifyString($fid, parent::getVoc() . "frequency-value", $freq));
         }
         parent::addRDF(parent::triplifyString($fid, parent::getVoc() . "lower-frequency", sprintf("%.3f", $freq_lower)) . parent::triplifyString($fid, parent::getVoc() . "upper-frequency", sprintf("%.3f", $freq_upper)));
         parent::setCheckpoint('record');
     }
     parent::setCheckpoint('file');
 }