function Run() { $ldir = $this->GetParameterValue('indir'); $odir = $this->GetParameterValue('outdir'); $rfile = $this->GetParameterValue('download_url'); $lfile = substr($rfile, strrpos($rfile, "/") + 1); // check if exists if (!file_exists($ldir . $lfile) or parent::getParameterValue('download') == 'true') { echo "dowloading {$rfile} ..."; trigger_error("Will attempt to download ", E_USER_NOTICE); Utils::DownloadSingle($rfile, $ldir . $lfile); echo "done" . PHP_EOL; } // make sure we have the zip archive $zin = new ZipArchive(); if ($zin->open($ldir . $lfile) === FALSE) { trigger_error("Unable to open {$ldir}{$lfile}"); exit; } // get the work if ($this->GetParameterValue('files') == 'all') { $files = explode("|", $this->GetParameterList('files')); array_shift($files); } else { $files = explode("|", $this->GetParameterValue('files')); } $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } $outfile = "ndc." . parent::getParameterValue('output_format'); parent::setWriteFile($odir . $outfile, $gz); // now go through each item in the zip file and process foreach ($files as $file) { echo "Processing {$file}... "; $fpin = $zin->getStream($file . ".txt"); if (!$fpin) { trigger_error("Unable to get pointer to {$file} in {$ldir}{$lfile}", E_USER_ERROR); return FALSE; } $this->{$file}($fpin); parent::writeRDFBufferToWriteFile(); echo "done!" . PHP_EOL; } parent::getWriteFile()->close(); echo "Generating dataset description for {$outfile}... "; //start generating dataset description file $dataset_description = ''; $source_file = (new DataResource($this))->setURI($rfile)->setTitle("FDA National Drug Code Directory")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($ldir . $lfile)))->setFormat("text/tab-separated-value")->setFormat("application/zip")->setPublisher("http://www.fda.gov")->setHomepage("http://www.fda.gov/Drugs/InformationOnDrugs/ucm142438.htm")->setRights("use-share")->setLicense(null)->setDataset("http://identifiers.org/ndc/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ndc/ndc.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description = $source_file->toRDF() . $output_file->toRDF(); //write dataset description to file parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function Parse($file) { parent::getReadFile()->read(); // skip the first comment line $line = 1; $first = true; while ($l = parent::getReadFile()->read(500000)) { if ($l[0] == "#") { // dataset attributes $a = explode('=', trim($l)); $r = $this->getVoc() . substr($a[0], 2); if (isset($a[1])) { $v = $a[1]; if ($r == "affymetrix_vocabulary:genome-version-create_date") { $x = explode("-", $a[1]); if ($x[2] == "00") { $x[2] = "01"; } $v = implode("-", $x); } parent::addRDF(parent::triplifyString(parent::getDatasetURI(), $r, $v) . parent::describe($r, "{$r}")); } continue; } if ($first == true) { $first = false; // header $header = explode(",", str_replace('"', '', trim($l))); // print_r($header);exit; $n = count($header); if ($n != 41) { trigger_error("Expecting 41 columns, found {$n} in header on line {$line}!", E_USER_ERROR); exit; } continue; } $a = explode('","', substr($l, 1, -2)); $n = count($a); if ($n != 41) { trigger_error("Expecting 41 columns, found {$n} on line {$line}!", E_USER_ERROR); exit; } parent::writeRDFBufferToWriteFile(); $id = $a[0]; $qname = "affymetrix:{$id}"; $label = "probeset {$a['0']} on GeneChip {$a['1']} ({$a['2']})"; parent::addRDF(parent::describeIndividual($qname, $label, $this->getVoc() . "Probeset") . parent::describeClass($this->getVoc() . "Probeset", "Affymetrix probeset")); trigger_error($id, E_USER_NOTICE); // now process the entries foreach ($a as $k => $v) { if (trim($v) == '---') { continue; } // multi-valued entries are separated by //// $b = explode(" /// ", $v); $r = $this->Map($k); if (isset($r)) { foreach ($b as $c) { $d = explode(" // ", $c); if ($r == 'symbol') { $d[0] = str_replace(" ", "-", $d[0]); } $s = $this->getRegistry()->getPreferredPrefix($r); if ($s == "ec") { $e = explode(":", $d[0]); $d[0] = $e[1]; } $this->addRDF(parent::triplify($qname, $this->getVoc() . "x-{$s}", "{$s}:" . $d[0]) . parent::describeProperty($this->getVoc() . "x-{$s}", "a relation to {$s}")); } } else { // we handle manually unset($rel); $label = $header[$k]; switch ($label) { case 'GeneChip Array': $array_id = parent::getRes() . str_replace(" ", "-", $v); parent::addRDF(parent::triplify($qname, $this->getVoc() . "genechip-array", $array_id) . parent::describeIndividual($array_id, "Affymetrix {$v} GeneChip array", $this->getVoc() . "Genechip-Array") . parent::describeClass($this->getVoc() . "Genechip-Array", "Affymetrix GeneChip array")); break; case 'Gene Ontology Biological Process': if (!isset($rel)) { $rel = 'go-process'; $prefix = "go"; } case 'Gene Ontology Cellular Component': if (!isset($rel)) { $rel = 'go-location'; $prefix = "go"; } case 'Gene Ontology Molecular Function': if (!isset($rel)) { $rel = 'go-function'; $prefix = "go"; } $b = explode(" /// ", $v); foreach ($b as $c) { $d = explode(" // ", $c); parent::addRDF($this->triplify($qname, $this->getVoc() . $rel, "{$prefix}:" . $d[0]) . $this->describeProperty($this->getVoc() . $rel, "{$rel}")); } break; case 'Transcript Assignments': $b = explode(" /// ", $v); foreach ($b as $c) { $d = explode(" // ", $c); $id = $d[0]; $prefix = $d[2]; if ($prefix == '---' || $id == '---') { continue; } else { if ($prefix == 'gb' || $prefix == 'gb_htc') { $prefix = 'genbank'; } else { if ($prefix == 'ncbibacterial') { $prefix = 'gi'; } else { if ($prefix == 'ncbi_bacterial') { $prefix = 'gi'; } else { if ($prefix == 'ens') { $prefix = 'ensembl'; } else { if ($prefix == 'ncbi_mito' || $prefix == 'ncbi_organelle' || $prefix == 'organelle') { $prefix = 'refseq'; } else { if ($prefix == 'affx' || $prefix == 'unknown' || $prefix == "prop") { $prefix = 'affymetrix'; } else { if ($prefix == 'tigr_2004_08') { $prefix = 'tigr'; } else { if ($prefix == 'tigr-plantta') { $prefix = 'genbank'; } else { if ($prefix == 'newrs.gi') { $prefix = 'gi'; } else { if ($prefix == 'newRS.gi') { $prefix = 'gi'; } else { if ($prefix == 'primate_viral') { $prefix = 'genbank'; } else { if ($prefix == 'jgi-bacterial') { $prefix = 'ncbigene'; } else { if ($prefix == 'tb') { $prefix = 'tuberculist'; } else { if ($prefix == 'pa') { $prefix = 'pseudomonas'; } else { if ($prefix == 'gi|53267') { $prefix = 'gi'; $id = '53267'; } else { if ($prefix == 'broad-tcup') { $e = explode("-", $id); $id = $e[0]; } else { if ($prefix == 'organelle') { $e = explode("-", $id); $prefix = 'genbank'; $id = $e[0]; } } } } } } } } } } } } } } } } } } parent::addRDF(parent::triplify($qname, $this->getVoc() . "transcript-assignment", "{$prefix}:{$id}") . parent::describeProperty($this->getVoc() . "transcript-assignment", "transcript assignment")); } break; case 'Annotation Transcript Cluster': /* $id = substr($v,0,strpos($v,"(")); $rel = str_replace(" ","-",strtolower($label)); $this->AddRDF($this->triplify($qname,parent::getVoc()."$rel", "refseq:$id")); */ break; case 'Annotation Date': // Jun 9, 2011 $rel = "annotation-date"; preg_match("/^([A-Za-z]+) ([0-9]+), ([0-9]{4})\$/", $v, $m); if (count($m) == 4) { array_shift($m); list($m, $day, $year) = $m; $month = $this->getMonth($m); if (!$day || $day == "0") { $day = "01"; } $date = $year . "-" . $month . "-" . str_pad($day, 2, "0", STR_PAD_LEFT) . "T00:00:00Z"; parent::addRDF(parent::triplifyString($qname, $this->getVoc() . $rel, $date, "xsd:dateTime") . parent::describeProperty($this->getVoc() . $rel, "{$rel}")); } else { trigger_error("could not match date from {$v}", E_USER_ERROR); } break; case 'Species Scientific Name': break; case 'Transcript ID(Array Design)': if (!isset($rel)) { $rel = 'transcript'; } case 'Sequence type': default: if (!isset($rel)) { $rel = str_replace(" ", "-", strtolower($label)); } $b = explode(" /// ", $v); foreach ($b as $c) { parent::addRDF(parent::triplifyString($qname, $this->getVoc() . $rel, stripslashes($c)) . parent::describeProperty($this->getVoc() . $rel, "{$rel}")); } break; } // switch } // else } $this->WriteRDFBufferToWriteFile(); } }
function twosides() { $items = null; $id = 0; $this->GetReadFile()->Read(); while ($l = $this->GetReadFile()->Read()) { $a = explode("\t", $l); $id++; if ($id % 10000 == 0) { $this->WriteRDFBufferToWriteFile(); } $uid = "twosides:{$id}"; $d1 = "pubchemcompound:" . (int) sprintf("%d", substr($a[0], 4)); $d1_name = $a[2]; $d2 = "pubchemcompound:" . (int) sprintf("%d", substr($a[1], 4)); $d2_name = $a[3]; $e = "umls:" . $a[4]; $e_name = strtolower($a[5]); $uid_label = "DDI between {$d1_name} and {$d2_name} leading to {$e_name}"; if (!isset($items[$d1])) { parent::addRDF(parent::describeIndividual($d1, $d1_name, parent::getVoc() . "Chemical") . parent::describeClass(parent::getVoc() . "Chemical", "PharmGKB Chemical")); $items[$d1] = ''; } if (!isset($items[$d2])) { parent::addRDF(parent::describeIndividual($d2, $d2_name, parent::getVoc() . "Chemical") . parent::describeClass(parent::getVoc() . "Chemical", "PharmGKB Chemical")); $items[$d2] = ''; } if (!isset($items[$e])) { parent::addRDF(parent::describeIndividual($e, $e_name, parent::getVoc() . "Event") . parent::describeClass(parent::getVoc() . "Event", "PharmGKB side effect event")); $items[$e] = ''; } parent::addRDF(parent::describeIndividual($uid, $uid_label, parent::getVoc() . "Drug-Drug-Association") . parent::describeClass(parent::getVoc() . "Drug-Drug-Association", "PharmGKB Twosides Drug-Drug Association") . parent::triplify($uid, parent::getVoc() . "chemical", $d1) . parent::triplify($uid, parent::getVoc() . "chemical", $d2) . parent::triplify($uid, parent::getVoc() . "event", $e) . parent::triplifyString($uid, parent::getVoc() . "p-value", $a[7])); } parent::writeRDFBufferToWriteFile(); }
function Run() { // directory shortcuts $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); // get the work specified $list = trim(parent::getParameterValue('files')); if ($list == 'all') { // call the getAllModelsId webservice $file = $ldir . "all_models.json"; if (!file_exists($file)) { try { $x = @new SoapClient("http://www.ebi.ac.uk/biomodels-main/services/BioModelsWebServices?wsdl"); } catch (Exception $e) { echo $e->getMessage(); } $entries = $x->getAllModelsId(); file_put_contents($file, json_encode($entries)); } else { $entries = json_decode(file_get_contents($file)); } } elseif ($list == 'curated') { // call the getAllCuratedModelsId webservice $file = $ldir . "curated_models.json"; if (!file_exists($file)) { try { $x = @new SoapClient("http://www.ebi.ac.uk/biomodels-main/services/BioModelsWebServices?wsdl"); } catch (Exception $e) { echo $e->getMessage(); } $entries = $x->getAllCuratedModelsId(); file_put_contents($file, json_encode($entries)); } else { $entries = json_decode(file_get_contents($file)); } } else { // check if a hyphenated list was provided if (($pos = strpos($list, "-")) !== FALSE) { $start_range = substr($list, 0, $pos); $end_range = substr($list, $pos + 1); for ($i = $start_range; $i <= $end_range; $i++) { $entries[] = "BIOMD" . str_pad($i, 10, "0", STR_PAD_LEFT); } } else { // for comma separated list $b = explode(",", $this->GetParameterValue('files')); foreach ($b as $e) { $entries[] = "BIOMD" . str_pad($e, 10, "0", STR_PAD_LEFT); } } } $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } // set the write file $suffix = parent::getParameterValue('output_format'); $outfile = 'biomodels' . '.' . $suffix; $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } $dataset_description = ''; parent::setWriteFile($odir . $outfile, $gz); // iterate over the entries $i = 0; $total = count($entries); foreach ($entries as $id) { echo "processing " . ++$i . " of {$total} - biomodel# " . $id; $download_file = $ldir . $id . ".owl.gz"; $url = parent::getParameterValue('download_url') . "publ/{$id}/{$id}-biopax3.owl"; // download if the file doesn't exist or we are told to if (!file_exists($download_file) || $this->GetParameterValue('download') == 'true') { // download echo " - downloading"; $ret = utils::downloadsingle($url, 'compress.zlib://' . $download_file, true); if ($ret === false) { echo "\nTrying non-curated model"; $url = parent::getParametervalue('download_url') . "uncura_publ/{$id}/{$id}-biopax3.owl"; $ret = utils::downloadsingle($url, 'compress.zlib://' . $download_file, true); if ($ret === false) { continue; } } echo " - downloaded"; } // load entry, parse and write to file echo " - parsing... "; // $this->SetReadFile($download_file,true); $buf = file_get_contents("compress.zlib://" . $download_file); $converter = new BioPAX2Bio2RDF($this); $converter->SetBuffer($buf)->SetBioPAXVersion(3)->SetBaseNamespace("http://identifiers.org/biomodels.db/{$id}/")->SetBio2RDFNamespace("http://bio2rdf.org/biomodels:" . $id . "_")->SetDatasetURI($this->GetDatasetURI()); $rdf = $converter->Parse(); parent::addRDF($rdf); parent::writeRDFBufferToWriteFile(); //generate dataset description $source_file = (new DataResource($this))->setURI($url)->setTitle("EBI BioModels Database - BioModel # {$id}")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($download_file)))->setFormat("rdf/xml")->setPublisher("http://www.ebi.ac.uk/")->setHomepage("http://www.ebi.ac.uk/biomodels-main/")->setRights("use-share-modify")->setLicense("http://www.ebi.ac.uk/biomodels-main/termsofuse")->setDataset("http://identifiers.org/biomodels.db/"); $dataset_description .= $source_file->toRDF(); echo "done!" . PHP_EOL; } //foreach parent::getWriteFile()->close(); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/biomodels/biomodels.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $output_file->toRDF(); //write dataset description to file parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function Run() { // get the work if ($this->GetParameterValue('files') == 'all') { $sources = explode("|", parent::getParameterList('files')); array_shift($sources); } else { // comma separated list $sources = explode(",", parent::getParameterValue('files')); } $download_files = array("h**o-sapiens" => "Pathway%20Commons%202%20homo%20sapiens.BIOPAX.owl.gz", "hprd" => "Pathway%20Commons%202%20HPRD.BIOPAX.owl.gz", "humancyc" => "Pathway%20Commons%202%20HumanCyc.BIOPAX.owl.gz", "nci-nature" => "Pathway%20Commons%202%20NCI_Nature.BIOPAX.owl.gz", "panther-pathway" => "Pathway%20Commons%202%20PANTHER%20Pathway.BIOPAX.owl.gz", "phosphositeplus" => "Pathway%20Commons%202%20PhosphoSitePlus.BIOPAX.owl.gz", "reactome" => "Pathway%20Commons%202%20Reactome.BIOPAX.owl.gz"); $graph_uri = parent::getGraphURI(); if (parent::getParameterValue('dataset_graph') == true) { parent::setGraphURI(parent::getDatasetURI()); } $dataset_description = ''; // iterate over the requested data foreach ($sources as $source) { echo "processing {$source}... "; $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $rdir = parent::getParameterValue('download_url'); // set the remote and input files $file = $source . ".owl"; $zfile = $source . ".owl.gz"; $rfile = $rdir . $download_files[$source]; $lfile = $ldir . $zfile; // download if if the file doesn't exist locally or we are told to if (!file_exists($lfile) || $this->GetParameterValue('download') == 'true') { // download echo "downloading... "; file_put_contents($lfile, file_get_contents($rfile)); } // extract the file out of the ziparchive // and load into a buffer echo 'extracting... '; if (($fpin = gzopen($lfile, "r")) === FALSE) { trigger_error("Unable to open {$lfile}", E_USER_ERROR); exit; } $data = ''; while (!gzeof($fpin)) { $buffer = gzgets($fpin, 4096); $data .= $buffer; } gzclose($fpin); // set the output file $suffix = parent::getParameterValue('output_format'); $outfile = $source . '.' . $suffix; $gz = false; if (strstr(parent::getParameterValue('output_format'), "gz")) { $gz = true; } parent::setWriteFile($odir . $outfile, $gz); // send for parsing $p = new BioPAX2Bio2RDF($this); $p->SetBuffer($data)->SetBioPAXVersion(3)->SetBaseNamespace("http://purl.org/pc2/3/")->SetBio2RDFNamespace("http://bio2rdf.org/pathwaycommons:")->SetDatasetURI(parent::getDatasetURI()); $rdf = $p->Parse(); parent::addRDF($rdf); // write to output parent::writeRDFBufferToWriteFile(); parent::getWriteFile()->Close(); echo "done!" . PHP_EOL; //generate dataset description echo "Generating dataset description for {$zfile}... "; $source_file = (new DataResource($this))->setURI($rfile)->setTitle("Pathway Commons")->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z", filemtime($lfile)))->setFormat("rdf/xml")->setPublisher("http://www.pathwaycommons.org/")->setHomepage("http://www.pathwaycommons.org/")->setRights("use")->setRights("restricted-by-source-license")->setLicense("http://www.pathwaycommons.org/pc2/home.html#data_sources")->setDataset("http://identifiers.org/pathwaycommons/"); $dataset_description .= $source_file->toRDF(); echo "done!" . PHP_EOL; } echo "Generating dataset description for Bio2RDF Pathways Commons dataset... "; $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pathwaycommons/pathwaycommons.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description .= $output_file->toRDF(); //write dataset description to file parent::setGraphURI($graph_uri); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
/** * process a results xml file from the download directory **/ function process_file($infile) { $indir = parent::getParameterValue('indir'); $xml = new CXML($infile); $this->setCheckPoint('file'); while ($xml->Parse("clinical_study") == TRUE) { $this->setCheckPoint('record'); $this->root = $root = $xml->GetXMLRoot(); $this->nct_id = $nct_id = $this->getString("//id_info/nct_id"); $this->study_id = $study_id = parent::getNamespace() . "{$nct_id}"; ### declare $label = $this->getString("//brief_title"); if (!$label) { $label = $this->getString("//official_title"); } if (!$label) { $label = "Clinical trial #" . $nct_id; } parent::addRDF(parent::describeIndividual($study_id, $label, parent::getVoc() . "Clinical-Study") . parent::describeClass(parent::getVoc() . "Clinical-Study", "Clinical Study")); ########################################################################################## #required header ########################################################################################## parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "download-date", $this->getString('//required_header/download_date')) . parent::triplify($study_id, parent::getVoc() . "url", $this->getString('//required_header/url'))); ########################################################################################## #identifiers ########################################################################################## parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "nct-id", $this->getString('//id_info/nct_id'), "xsd:string") . parent::triplifyString($study_id, parent::getVoc() . "org-study-id", $this->getString('//id_info/org_study_id'), "xsd:string")); $sids = $root->xpath('//id_info/secondary_id'); if (isset($sids)) { foreach ($sids as $id) { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "secondary-id", (string) $id, "xsd:string")); } } $nctaliases = $root->xpath('//id_info/nct-alias'); if (isset($nctaliases)) { foreach ($nctaliases as $id) { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "nct-alias", (string) $id, "xsd:string")); } } ########################################################################################## #titles ########################################################################################## parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "brief-title", $this->getString("//brief_title")) . parent::triplifyString($study_id, parent::getVoc() . "official-title", $this->getString("//official_title"))); ################################################################################### #brief summary ################################################################################### $brief_summary = str_replace(array("\r", "\n", "\t"), array("
", "
", "	"), $this->getString('//brief_summary/textblock')); parent::addRDF(parent::triplifyString($study_id, $this->getVoc() . "brief-summary", $brief_summary)); #################################################################################### # detailed description #################################################################################### $d = str_replace(array("\r", "\n", "\t"), array("
", "
", "	"), $this->getString('//detailed_description/textblock')); parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "detailed-description", $d)); ######################################################################################### #acronym ######################################################################################### parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "acronym", $this->getString("//acronym"))); ######################################################################################## #sponsors ######################################################################################## try { $sponsors = array("lead_sponsor", "collaborator"); foreach ($sponsors as $sponsor) { $a = @array_shift($root->xpath('//sponsors/' . $sponsor)); if ($a == null) { break; } $agency = $this->getString("//agency", $a); $agency_id = parent::getRes() . md5($agency); $agency_class = $this->getString("//agency_class", $a); $agency_class_id = parent::getRes() . md5($agency_class); parent::addRDF(parent::describeIndividual($agency_id, $agency, parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $sponsor), $agency_id) . parent::describeIndividual($agency_class_id, $agency_class, parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplify($agency_id, parent::getVoc() . "organization", $agency_class_id)); } } catch (Exception $e) { echo "There was an error in the lead sponsor element: {$e}\n"; } ################################################################################# # source ################################################################################# $source = $this->getString('//source'); if ($source) { $source_id = parent::getRes() . md5($source); parent::addRDF(parent::describeIndividual($source_id, $source, parent::getVoc() . "Organization") . parent::triplify($study_id, parent::getVoc() . "source", $source_id)); } ###################################################################################### # oversight ###################################################################################### try { $oversight = @array_shift($root->xpath('//oversight_info')); $oversight_id = parent::getRes() . md5($oversight->asXML()); $authority = $this->getString('//authority', $oversight); $authority_id = parent::getRes() . md5($authority); parent::addRDF(parent::describeIndividual($oversight_id, $authority, parent::getVoc() . "Organization") . parent::triplify($study_id, $this->getVoc() . "oversight", $oversight_id) . parent::triplify($study_id, $this->getVoc() . "authority", $authority_id) . parent::triplifyString($oversight_id, parent::getVoc() . "has-dmc", $this->getString('//has_dmc', $oversight))); } catch (Exception $e) { echo "There was an error in the oversight info element: {$e}\n"; } ################################################################################# # overall status ################################################################################# $overall_status = $this->getString('//overall_status'); if ($overall_status) { $status_id = parent::getRes() . md5($overall_status); parent::addRDF(parent::describeIndividual($status_id, $overall_status, parent::getVoc() . "Status") . parent::describeClass(parent::getVoc() . "Status", "Status") . parent::triplify($study_id, parent::getVoc() . "overall-status", $status_id)); } ######################################################################################### #why stopped ######################################################################################### parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "why-stopped", $this->getString("//why_stopped"))); ################################################################################## # dates ################################################################################## $dates = array("start_date", "end_date", "completion_date", "primary_completion_date", "verification_date", "lastchanged_date", "firstreceived_date", "firstreceived_results_date"); foreach ($dates as $date) { $d = $this->getString('//' . $date); if ($d) { $datetime = $this->getDatetimeFromDate($d); if (isset($datetime)) { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . str_replace("_", "-", $date), $datetime)); } else { trigger_error("unable to parse date: {$d}", E_USER_ERROR); } } } #################################################################################### # phase #################################################################################### $phase = $this->getString('//phase'); if ($phase && $phase != "N/A") { $phase_id = $this->getRes() . md5($phase); parent::addRDF(parent::describeIndividual($phase_id, $phase, parent::getVoc() . "Phase", $phase) . parent::describeClass(parent::getVoc() . "Phase", $phase) . parent::triplify($study_id, parent::getVoc() . "phase", $phase_id)); } ################################################################################### # study type #################################################################################### $study_type = $this->getString('//study_type'); if ($study_type) { $study_type_id = $this->getRes() . md5($study_type); parent::addRDF(parent::describeClass($study_type_id, $study_type, parent::getVoc() . "Study-Type") . parent::describeClass(parent::getVoc() . "Study-Type", "Study Type") . parent::triplify($study_id, parent::getVoc() . "study-type", $study_type_id)); } ############################################################################### # study design ############################################################################### $study_design = $this->getString('//study_design'); if ($study_design) { $study_design_id = parent::getRes() . md5($study_id . $study_design); parent::addRDF(parent::describeIndividual($study_design_id, "{$study_id} study design", parent::getVoc() . "Study-Design") . parent::describeClass(parent::getVoc() . "Study-Design", "Study Design") . parent::triplify($study_id, parent::getVoc() . "study-design", $study_design_id)); // Intervention Model: Parallel Assignment, Masking: Double-Blind, Primary Purpose: Treatment foreach (explode(", ", $study_design) as $i => $b) { $c = explode(": ", $b); if (isset($c[1])) { $sdp = $study_design_id . "-" . ($i + 1); $key = parent::getRes() . md5($c[0]); $value = parent::getRes() . md5($c[1]); parent::addRDF(parent::describeIndividual($sdp, $b, parent::getVoc() . "Study-Design-Parameter") . parent::describeClass(parent::getVoc() . "Study-Design-Parameter", "Study Design Parameter") . parent::triplify($sdp, parent::getVoc() . "key", $key) . parent::describeClass($key, $c[0]) . parent::triplify($sdp, parent::getVoc() . "value", $value) . parent::describeClass($value, $c[1]) . parent::triplify($study_design_id, parent::getVoc() . "study-design-parameter", $sdp)); } } } #################################################################################### # target duration #################################################################################### parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "target-duration", $this->getString('//target_duration'))); ################################################################################ # outcomes ############################################################################### $outcomes = array("primary_outcome", "secondary_outcome", "other_outcome"); foreach ($outcomes as $outcome) { $o = $root->xpath('//' . $outcome); if ($o) { $os = $o; if (!is_array($o)) { $os = array($o); } foreach ($os as $o) { try { $po_id = parent::getRes() . md5($nct_id . $o->asXML()); $po_type = parent::getVoc() . str_replace("_", "-", $outcome); $measure = $this->getString('//measure', $o); $time_frame = $this->getString('//time_frame', $o); $safety_issue = $this->getString('//saftey_issue', $o); $description = $this->getString('//description', $o); parent::addRDF(parent::describeIndividual($po_id, $measure . " " . $time_frame, ucfirst($po_type)) . parent::describeClass(ucfirst($po_type), str_replace("_", " ", ucfirst($outcome))) . parent::triplifyString($po_id, "dc:description", $description) . parent::triplifyString($po_id, parent::getVoc() . "measure", $measure) . parent::triplifyString($po_id, parent::getVoc() . "time-frame", $time_frame) . parent::triplifyString($po_id, parent::getVoc() . "safety-issue", $safety_issue) . parent::triplify($study_id, parent::getVoc() . $po_type, $po_id)); } catch (Exception $e) { echo "There was an error parsing the primary outcome element: {$e} \n"; } } } } ############################################################################## #number of arms ############################################################################## try { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "number-of-arms", $this->getString('//number_of_arms'))); } catch (Exception $e) { echo "There was an exception parsing the number of arms element: {$e}\n"; } ############################################################################## #number of groups ############################################################################## try { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "number-of-arms", $this->getString('//number_of_groups'))); } catch (Exception $e) { echo "There was an exception parsing the number of groups: {$e}\n"; } ############################################################################## #enrollment ############################################################################## try { $e = $root->xpath('//enrollment'); if ($e) { $type = strtolower((string) $e[0]->attributes()->type); $value = $this->getString('//enrollment'); parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . ($type ? $type . "-" : "") . "enrollment", $value)); } } catch (Exception $e) { echo "There was an exception parsing the enrollment element: {$e}\n"; } ############################################################################### #condition ############################################################################### try { $conditions = $root->xpath('//condition'); foreach ($conditions as $condition) { $mesh_label_id = parent::getRes() . md5($condition); parent::addRDF(parent::triplify($study_id, parent::getVoc() . "condition", $mesh_label_id) . parent::describeClass($mesh_label_id, $condition, parent::getVoc() . "Condition") . parent::describeClass(parent::getVoc() . "Condition", "Condition")); } } catch (Exception $e) { echo "There was an exception parsing condition element: {$e}\n"; } ################################################################################ # arm_group ################################################################################ try { $arm_groups = $root->xpath('//arm_group'); foreach ($arm_groups as $arm_group) { $arm_group_id = $this->getString('./arm_group_label', $arm_group); $arm_group_id = md5($arm_group_id); $arm_group_uri = parent::getRes() . $this->nct_id . "/arm-group/" . $arm_group_id; $arm_group_label = $this->nct_id . " arm group " . $arm_group_id; $arm_group_type = ucfirst(str_replace(" ", "_", $this->getString('./arm_group_type', $arm_group))); if (!$arm_group_type) { $arm_group_type = "Clinical-Arm"; } $description = $this->getString('./description', $arm_group); parent::addRDF(parent::describeIndividual($arm_group_uri, $arm_group_label, parent::getVoc() . $arm_group_type) . parent::describeClass(parent::getVoc() . $arm_group_type, ucfirst(str_replace("_", " ", $arm_group_type))) . parent::triplifyString($arm_group_uri, parent::getVoc() . "description", $description) . parent::describeIndividual($arm_group_uri, $arm_group, parent::getVoc() . "Arm-Group") . parent::describeClass(parent::getVoc() . "Arm-Group", "Arm Group") . parent::triplify($study_id, parent::getVoc() . "arm-group", $arm_group_uri)); } } catch (Exception $e) { echo "There was an exception in arm groups: {$e}\n"; } ############################################################################## #intervention ############################################################################## try { $interventions = $root->xpath('//intervention'); foreach ($interventions as $intervention) { $intervention_id = parent::getRes() . md5($intervention->asXML()); $intervention_name = $this->getString('./intervention_name', $intervention); $intervention_type = $this->getString('./intervention_type', $intervention); $intervention_type_uri = parent::getVoc() . ucfirst(str_replace(" ", "_", $intervention_type)); $intervention_desc = $this->getString('./description', $intervention); $intervention_on = $this->getString('./other_name', $intervention); parent::addRDF(parent::describeIndividual($intervention_id, $intervention_name, $intervention_type_uri) . parent::describeClass($intervention_type_uri, $intervention_type) . parent::triplifyString($intervention_id, parent::getVoc() . "intervention-name", $intervention_name) . parent::triplifyString($intervention_id, parent::getVoc() . "intervention-desc", $intervention_desc) . parent::triplifyString($intervention_id, parent::getVoc() . "other-name", $intervention_on) . parent::triplify($study_id, parent::getvoc() . "intervention", $intervention_id)); $agl = $intervention->xpath("./arm_group_label"); foreach ($agl as $a) { $arm_group_id = md5($a); $ag = parent::getRes() . $this->nct_id . "/arm-group/" . $arm_group_id; parent::addRDF(parent::describeIndividual($ag, $a, parent::getVoc() . "Arm-Group") . parent::describeClass(parent::getVoc() . "Arm-Group", "Arm Group") . parent::triplify($intervention_id, parent::getVoc() . "arm-group", $ag)); } } } catch (Exception $e) { echo "There was an error in interventions {$e}\n"; } ############################################################################### #eligibility ################################################################################ try { $eligibility = @array_shift($root->xpath('//eligibility')); if ($eligibility !== null) { $eligibility_label = "eligibility for " . $study_id; $eligibility_id = parent::getRes() . md5($eligibility->asXML()); parent::addRDF(parent::describeIndividual($eligibility_id, $eligibility_label, parent::getVoc() . "Eligibility") . parent::describeClass(parent::getVoc() . "Eligibility", "Eligibility") . parent::triplify($study_id, parent::getVoc() . "eligibility", $eligibility_id)); if ($criteria = @array_shift($eligibility->xpath('./criteria'))) { $text = @array_shift($criteria->xpath('./textblock')); parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . "text", $text)); $c = preg_split("/(Inclusion Criteria\\:|Exclusion Criteria\\:)/", $text); //inclusion if (isset($c[1])) { $d = explode(" - ", $c[1]); // the lists are separated by a hyphen foreach ($d as $inclusion) { $inc = trim($inclusion); if ($inc != '') { $inc_id = parent::getRes() . md5($inc); parent::addRDF(parent::describeIndividual($inc_id, $inc, parent::getVoc() . "Inclusion-Criteria") . parent::describeClass(parent::getVoc() . "Inclusion-Criteria", "Inclusion Criteria") . parent::triplify($eligibility_id, parent::getVoc() . "inclusion-criteria", $inc_id)); } } } //exclusion if (isset($c[2])) { $d = explode(" - ", $c[1]); foreach ($d as $exclusion) { $exc = trim($exclusion); if ($exc != '') { $exc_id = parent::getRes() . md5($exc); parent::addRDF(parent::describeIndividual($exc_id, $exc, parent::getVoc() . "Exclusion-Criteria") . parent::describeClass(parent::getVoc() . "Exclusion-Criteria", "Exclusion Criteria") . parent::triplify($eligibility_id, parent::getVoc() . "exclusion-criteria", $exc_id)); } } } } parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . "gender", $this->getString('./gender', $eligibility))); parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . "healthy-volunteers", $this->getString('./healthy_volunteers', $eligibility))); $attributes = array('minimum_age', 'maximum_age'); foreach ($attributes as $a) { $s = $this->getString('./' . $a, $eligibility); if ($s != 'N/A') { $age = trim(str_replace("Years", "", $s)); parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . str_replace("_", "-", $a), $age)); } } $attributes = array("study_pop" => "study-population", "sampling_method" => "sampling-method"); foreach ($attributes as $a => $r) { $e = @array_shift($eligibility->xpath('./' . $a)); if ($s = $this->getString('./' . $a, $eligibility)) { parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . $r, $this->getString('./textblock', $e))); } } } } catch (Exception $e) { echo "There was an error in eligibility: {$e}\n"; } ###################################################################################### #biospec ##################################################################################### parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "biospec-retention", $this->getString('//biospec_retention'))); try { $b = @array_shift($root->xpath('//biospec_descr')); if ($b) { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "biospec_descr", $this->getString('./textblock', $b))); } } catch (Exception $e) { echo "There was an error in biospec_descr: {$e}\n"; } ################################################################### # contacts ################################################################### $contacts = array("overall_official", "overall_contact", "overall_contact_backup"); try { foreach ($contacts as $c) { $d = @array_shift($root->xpath('//' . $c)); if ($d) { parent::addRDF(parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $c), $this->makeContact($d))); } } } catch (Exception $e) { echo "There was an error parsing overall contact: {$e}" . "\n"; } ############################################################## # location of facility doing the testing ############################################################## try { $location = @array_shift($root->xpath('//location')); if ($location) { $location_uri = parent::getRes() . md5($location->asXML()); $name = $this->getString('//facility/name', $location); $address = @array_shift($location->xpath('//facility/address')); $contact = @array_shift($location->xpath('//contact')); $backups = @array_shift($location->xpath('//contact_backup')); $investigators = @array_shift($location->xpath('//investigator')); parent::addRDF(parent::describeIndividual($location_uri, $name, parent::getVoc() . "Location") . parent::describeClass(parent::getVoc() . "Location", "Location") . parent::triplifyString($location_uri, parent::getVoc() . "status", $this->getString('//status', $location)) . parent::triplify($study_id, parent::getVoc() . "location", $location_uri) . parent::triplify($location_uri, parent::getVoc() . "address", $this->makeAddress($address)) . ($contact != null ? parent::triplify($location_uri, parent::getVoc() . "contact", $this->makeContact($contact)) : "")); if ($backups) { foreach ($backups as $backup) { parent::addRDF(parent::triplify($location_uri, parent::getVoc() . "contact-backup", $this->makeContact($backup))); } } if ($investigators) { foreach ($investigators as $investigator) { parent::addRDF(parent::triplify($location_uri, parent::getVoc() . "investigator", $this->makeContact($investigator))); } } } } catch (Exception $e) { echo "There was an error parsing location: {$e}" . "\n"; } ###################################################################### #countries ###################################################################### try { $a = array("location_countries", "removed_countries"); foreach ($a as $country) { $lc = @array_shift($root->xpath('//' . $country)); if ($lc) { $label = $this->getString('//country', $lc); $cid = parent::getRes() . md5($label); parent::addRDF(parent::describeIndividual($cid, $label, parent::getVoc() . "Country") . parent::describeClass(parent::getVoc() . "Country", "Country") . parent::triplify($study_id, parent::getVoc() . "country", $cid)); } } } catch (Exception $e) { echo "There was an error parsing country: {$e}" . "\n"; } ###################################################################### #reference ###################################################################### try { $a = array("reference", "result_reference"); foreach ($a as $ref_type) { $references = $root->xpath('//' . $ref_type); foreach ($references as $reference) { $p = $this->getString('./PMID', $reference); if ($p) { $pmid = "pubmed:{$p}"; parent::addRDF(parent::describeIndividual($pmid, $p, parent::getVoc() . "Reference") . parent::describeClass(parent::getVoc() . "Reference", "Reference") . parent::triplifyString($pmid, parent::getVoc() . "citation", $this->getString('./citation', $reference)) . parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $ref_type), $pmid)); } } } } catch (Exception $e) { echo "There was an error parsing references element: {$e}\n"; } ####################################################################### #link ####################################################################### try { $links = $root->xpath('//link'); foreach ($links as $i => $link) { $url = $this->getString('./url', $link); $url = preg_replace("/>.*\$/", "", $url); $lid = parent::getRes() . md5($url); parent::addRDF(parent::describeIndividual($lid, $this->getString('./description', $link), parent::getVoc() . "Link") . parent::describeClass(parent::getVoc() . "Link", "Link") . parent::triplify($lid, parent::getVoc() . "url", $url) . parent::triplify($study_id, parent::getVoc() . "link", $lid)); } } catch (Exception $e) { echo "There was an error parsing link element: {$e}\n"; } ############################################################################ #responsible party ############################################################################ try { $rp = @array_shift($root->xpath('//responsible_party')); if ($rp) { $rp_id = parent::getRes() . md5($rp->asXML()); $label = $this->getString('./name_title', $rp); if (!$label) { $label = $this->getString('./organization', $rp); } else { $label .= ", " . $this->getString('./organization', $rp); } if (!$label) { $label = $this->getString('./party_type', $rp); } $org_id = parent::getRes() . md5($this->getString('./organization', $rp)); parent::addRDF(parent::describeIndividual($rp_id, $label, parent::getVoc() . "Responsible-Party") . parent::describeClass(parent::getVoc() . "Responsible-Party", "Responsible Party") . parent::triplify($study_id, parent::getVoc() . "responsible-party", $rp_id) . parent::triplify($rp_id, parent::getVoc() . "organization", $org_id) . parent::describeIndividual($org_id, $this->getString('./organization', $rp), parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplifyString($rp_id, parent::getVoc() . "name-title", $this->getString('./name_title', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "party-type", $this->getString('./party_type', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "investigator-affiliation", $this->getString('./investigator_affiliation', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "investigator-full-name", $this->getString('./investigator_full_name', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "investigator-title", $this->getString('./investigator_title', $rp))); } } catch (Exception $e) { echo "There was an error parsing the responsible_party element: {$e}\n"; } ############################################################################## # keywords ############################################################################## try { $keywords = $root->xpath('//keyword'); foreach ($keywords as $keyword) { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "keyword", (string) $keyword)); } } catch (Exception $e) { echo "There was an error parsing the keywords element: {$e}"; } # mesh terms # note: mesh terms are assigned using an imperfect algorithm try { $mesh_terms = $root->xpath('//condition_browse/mesh_term'); foreach ($mesh_terms as $mesh_term) { $term = (string) $mesh_term; $mesh_id = parent::getRes() . md5($term); parent::addRDF(parent::triplify($study_id, parent::getVoc() . "condition-mesh", $mesh_id)); parent::addRDF(parent::triplifyString($mesh_id, "rdfs:label", $term)); } } catch (Exception $e) { echo "There was an error in mesh_terms: {$e}\n"; } ################################################################################ # regulated by fda? is section 801? has expanded access? ################################################################################ try { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "is-fda-regulated", $this->getString('is_fda_regulated')) . parent::triplifyString($study_id, parent::getVoc() . "is-section-801", $this->getString('is_section_801')) . parent::triplifyString($study_id, parent::getVoc() . "has-expanded-access", $this->getString('has_expanded_access'))); } catch (Exception $e) { echo "There was an error parsing the is_fda_regulated element: {$e}\n"; } ############################################################################### # mesh terms for the intervention browse ############################################################################### try { $a = array("condition_browse", "intervention_browse"); foreach ($a as $browse_type) { $terms = $root->xpath("//{$browse_type}/mesh_term"); foreach ($terms as $term) { $term_label = (string) $term; $term_id = parent::getRes() . md5($term); parent::addRDF(parent::describeIndividual($term_id, $term_label, parent::getVoc() . "Term") . parent::describeClass(parent::getVoc() . "Term", "Term") . parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $browse_type), $term_id)); } } } catch (Exception $e) { echo "There was an error parsing {$browse_type}/mesh_term element: {$e}\n"; } ################################################################################ # clinical results ################################################################################ try { $cr = @array_shift($root->xpath('//clinical_results')); if ($cr) { $cr_id = parent::getRes() . md5($study_id . $cr->asXML()); parent::addRDF(parent::describeIndividual($cr_id, "clinical results for {$study_id}", parent::getVoc() . "Clinical-Result") . parent::describeClass(parent::getVoc() . "Clinical-Result", "Clinical Result") . parent::triplifyString($cr_id, parent::getVoc() . "description", $this->getString('./desc', $cr)) . parent::triplifyString($cr_id, parent::getVoc() . "restrictive-agreement", $this->getString('./restrictive_agreement', $cr)) . parent::triplifyString($cr_id, parent::getVoc() . "limitations-and-caveats", $this->getString('./limitations_and_caveats', $cr)) . parent::triplify($study_id, parent::getVoc() . "clinical-result", $cr_id)); } } catch (Exception $e) { echo "There was an error parsing clinical results: {$e}\n"; } ################################################################################ # Participant Flow ################################################################################ try { $pc = 1; $mc = 1; $wc = 1; $pf = @array_shift($root->xpath('//clinical_results/participant_flow')); if ($pf) { $pf_id = parent::getRes() . md5($pf->asXML()); parent::addRDF(parent::describeIndividual($pf_id, "participant flow for {$study_id}", parent::getVoc() . "Participant-Flow") . parent::describeClass(parent::getVoc() . "Participant-Flow", "Participant-Flow") . parent::triplify($study_id, parent::getVoc() . "participant-flow", $pf_id) . parent::triplifyString($pf_id, parent::getVoc() . "recruitment-details", $this->getString('./recruitment_details', $pf)) . parent::triplifyString($pf_id, parent::getVoc() . "pre-assignment-details", $this->getString('./pre_assignment_details', $pf))); $groups = @array_shift($pf->xpath('./group_list')); foreach ($groups as $group) { parent::addRDF(parent::triplify($pf_id, parent::getVoc() . "group", $this->makeGroup($group))); } //period_list $periods = @array_shift($pf->xpath('./period_list')); foreach ($periods as $period) { $period_id = parent::getRes() . $nct_id . "/period/" . $pc++; $period_title = $this->getString('./title', $period); parent::addRDF(parent::describeIndividual($period_id, $period_title . " for {$nct_id}", parent::getVoc() . "Period") . parent::describeClass(parent::getVoc() . "Period", "Period") . parent::triplify($pf_id, parent::getVoc() . "period", $period_id)); // milestones $milestones = @array_shift($period->xpath('./milestone_list')); if ($milestones) { foreach ($milestones as $milestone) { $milestone_id = parent::getRes() . $nct_id . "/milestone/" . $mc++; $label = $this->getString('./title', $milestone); parent::addRDF(parent::describeIndividual($milestone_id, $label, parent::getVoc() . "Milestone") . parent::describeClass(parent::getVoc() . "Milestone", "Milestone") . parent::triplify($period_id, parent::getVoc() . "milestone", $milestone_id)); // participants $p = 1; $ps_list = @array_shift($milestone->xpath('./participants_list')); foreach ($ps_list as $ps) { $ps_id = $milestone_id . "/p/" . $p++; $group_id = parent::getRes() . $this->nct_id . "/group/" . $ps->attributes()->group_id; $count = (string) $ps->attributes()->count; parent::addRDF(parent::describeIndividual($ps_id, "participant counts in " . $ps->attributes()->group_id . " for milestone {$mc} of {$nct_id}", parent::getVoc() . "Participant-Count") . parent::describeClass(parent::getVoc() . "Participant-Count", "Participant Count") . parent::triplify($ps_id, parent::getVoc() . "group", $group_id) . parent::triplifyString($ps_id, parent::getVoc() . "count", $count) . parent::triplify($milestone_id, parent::getVoc() . "participant-counts", $ps_id)); } } } // milestones $withdraws = @array_shift($period->xpath('./drop_withdraw_reason_list')); if ($withdraws) { foreach ($withdraws as $withdraw) { $wid = parent::getRes() . $this->nct_id . "/withdraw/" . $wc++; $label = $this->getString('./title', $withdraw); parent::addRDF(parent::describeIndividual($wid, $label, parent::getVoc() . "Withdraw-Reason") . parent::describeClass(parent::getVoc() . "Withdraw-Reason", "Withdraw Reason")); // participants $ps_list = @array_shift($withdraw->xpath('./participants_list')); foreach ($ps_list as $ps) { $group_id = parent::getRes() . $nct_id . "/group/" . $ps->attributes()->group_id; $count = (string) $ps->attributes()->count; parent::addRDF(parent::triplify($wid, parent::getVoc() . "group", $group_id) . parent::triplifyString($wid, parent::getVoc() . "count", $count)); } } } } } } catch (Exception $e) { echo "There was an error parsing participant flow element: {$e}\n"; } ################################################################################ # baseline ################################################################################ try { $baseline = @array_shift($root->xpath('//baseline')); if ($baseline) { $b_id = $this->nct_id . "/baseline"; $b_uri = parent::getRes() . $b_id; // group list $groups = @array_shift($baseline->xpath('./group_list')); foreach ($groups as $group) { parent::addRDF(parent::describeIndividual($b_uri, "baseline for {$nct_id}", parent::getVoc() . "Baseline") . parent::describeClass(parent::getVoc() . "Baseline", "Baseline") . parent::triplify($b_uri, parent::getVoc() . "group", $this->makeGroup($group)) . parent::triplify($study_id, parent::getVoc() . "baseline", $b_uri)); } // measure list $measures = @array_shift($baseline->xpath('./measure_list')); foreach ($measures as $measure) { parent::addRDF(parent::triplify($b_uri, parent::getVoc() . "measure", $this->makeMeasure($measure))); } } } catch (Exception $e) { echo "Error in parsing baseline" . PHP_EOL; } ################################################################################ # outcomes ################################################################################ try { $outcomes = @array_shift($root->xpath('//outcome_list')); if ($outcomes) { foreach ($outcomes as $i => $outcome) { $outcome_id = $this->nct_id . "/outcome/" . ($i + 1); $outcome_uri = parent::getRes() . $outcome_id; $outcome_label = $this->getString("./title", $outcome); if (!$outcome_label) { $outcome_label = "outcome for " . $this->nct_id; } parent::addRDF(parent::describeIndividual($outcome_uri, $outcome_label, parent::getVoc() . "Outcome", $this->getString("./description", $outcome)) . parent::describeClass(parent::getVoc() . "Outcome", "Outcome") . parent::triplify($study_id, parent::getVoc() . "outcome", $outcome_uri) . parent::triplifyString($outcome_uri, parent::getVoc() . "type", $this->getString("./type", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "time-frame", $this->getString("./time_frame", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "safety-issue", $this->getString("./safety_issue", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "posting-date", $this->getString("./posting-date", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "population", $this->getString("./population", $outcome))); $groups = @array_shift($outcome->xpath('./group_list')); if ($groups) { foreach ($groups as $group) { parent::addRDF(parent::triplify($outcome_uri, parent::getVoc() . "group", $this->makeGroup($group))); } } // measure list $measures = @array_shift($outcome->xpath('./measure_list')); if ($measures) { foreach ($measures as $measure) { parent::addRDF(parent::triplify($outcome_uri, parent::getVoc() . "measure", $this->makeMeasure($measure))); } } // analysis list $analyses = @array_shift($outcome->xpath('./analysis_list')); if ($analyses) { foreach ($analyses as $analysis) { parent::addRDF(parent::triplify($outcome_uri, parent::getVoc() . "analysis", $this->makeAnalysis($analysis))); } } } } } catch (Exception $e) { echo "Error in parsing outcomes" . PHP_EOL; } ################################################################################ # events ################################################################################ try { $c_ev = $c_c = 1; $reported_events = @array_shift($root->xpath('//reported_events')); if ($reported_events) { $rp_id = parent::getRes() . md5($reported_events->asXML()); $groups = @array_shift($reported_events->xpath('./group_list')); parent::addRDF(parent::describeIndividual($rp_id, "Reported events for {$nct_id}", parent::getVoc() . "Reported-Events") . parent::describeClass(parent::getVoc() . "Reported-Events", "Reported Events") . parent::triplify($study_id, parent::getVoc() . "reported-events", $rp_id)); foreach ($groups as $group) { parent::addRDF(parent::triplify($rp_id, parent::getVoc() . "group", $this->makeGroup($group))); } // events $event_list = array("serious_events" => "Serious Event", "other_events" => "Other Event"); foreach ($event_list as $ev => $ev_label) { $et = @array_shift($reported_events->xpath('./' . $ev)); if (!$et) { continue; } $ev_uri = parent::getVoc() . str_replace(" ", "-", $ev_label); $categories = @array_shift($et->xpath('./category_list')); foreach ($categories as $category) { $major_title = $this->getString('./title', $category); $major_title_uri = parent::getRes() . md5($major_title); $events = @array_shift($category->xpath('./event_list')); foreach ($events as $event) { $e_uri = parent::getRes() . $this->nct_id . "/{$ev}/" . $c_ev++; $subtitle = (string) $this->getString('./sub_title', $event) . " for " . $this->nct_id; $subtitle_uri = parent::getRes() . md5($subtitle); parent::addRDF(parent::describeIndividual($e_uri, $subtitle, $ev_uri) . parent::describeClass($ev_uri, $ev_label) . parent::triplify($e_uri, parent::getVoc() . "sub-title", $subtitle_uri) . parent::describeIndividual($subtitle_uri, $subtitle, parent::getVoc() . "Event") . parent::describeClass(parent::getVoc() . "Event", "Event") . parent::triplify($e_uri, parent::getVoc() . "major-title", $major_title_uri) . parent::describeClass($major_title_uri, $major_title) . parent::triplify($rp_id, parent::getVoc() . str_replace("_", "-", $ev), $e_uri)); $counts = $event->xpath('./counts'); foreach ($counts as $c) { $group_id = $c->attributes()->group_id; $group_uri = parent::getRes() . $nct_id . "/group/" . $group_id; $c_uri = $e_uri . "/count/" . $c_c++; parent::addRDF(parent::describeIndividual($c_uri, $subtitle . " for " . $group_id . " in " . $this->nct_id, parent::getVoc() . "Event-Count") . parent::describeClass(parent::getVoc() . "Event-Count", "Event Count") . parent::triplify($c_uri, parent::getVoc() . "group", $group_uri) . parent::triplify($e_uri, parent::getVoc() . "count", $c_uri) . parent::triplifyString($c_uri, parent::getVoc() . "default-vocabulary", $this->getString('./default_vocab', $et)) . parent::triplifyString($c_uri, parent::getVoc() . "frequency-threshold", $this->getString('./frequency_threshold', $et)) . parent::triplifyString($c_uri, parent::getVoc() . "default-assessment", $this->getString('./default_assessment', $et)) . parent::triplifyString($c_uri, parent::getVoc() . "number-events", $c->attributes()->events) . parent::triplifyString($c_uri, parent::getVoc() . "subjects-affected", $c->attributes()->subjects_affected) . parent::triplifyString($c_uri, parent::getVoc() . "subjects-at-risk", $c->attributes()->subjects_at_risk)); } } } } } } catch (Exception $e) { echo "Error in parsing reported events" . PHP_EOL; } parent::writeRDFBufferToWriteFile(); } $this->setCheckPoint('record'); $this->setCheckPoint('dataset'); }
function run() { $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); // get the snps from pharmgkb $snps = explode(",", parent::getParameterValue('files')); if ($snps[0] == 'all') { $snps = $this->getSNPs(); } else { if ($snps[0] == 'clinical') { $snps = $this->getSNPs(true); } else { if ($snps[0] == 'omim') { $lfile = $ldir . 'snp_omimvar.txt'; if (!file_exists($lfile) || parent::getParameterValue('download') == true) { $ret = utils::DownloadSingle('ftp://ftp.ncbi.nlm.nih.gov/snp/Entrez/snp_omimvar.txt', $lfile); } $snps = $this->processOMIMVar($lfile); } else { if ($snps[0] == 'pharmgkb') { $lfile = $ldir . 'pharmgkb.snp.zip'; if (!file_exists($lfile) || parent::getParameterValue('download') == true) { $ret = utils::DownloadSingle('http://www.pharmgkb.org/download.do?objId=rsid.zip&dlCls=common', $lfile); } $snps = $this->processPharmGKBSnps($lfile); } } } } $outfile = $odir . "dbsnp." . parent::getParameterValue('output_format'); $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; parent::setWriteFile($outfile, $gz); $n = count($snps); $z = 0; foreach ($snps as $i => $snp) { $file = $snp . '.xml.gz'; $infile = $ldir . $file; $rfile = parent::getParameterValue('download_url') . $snp; //$outfile = $odir.$snp.".".parent::getParameterValue('output_format'); // check if exists $download = false; if (!file_exists($infile)) { //trigger_error($lfile." not found. Will attempt to download. ", E_USER_NOTICE); parent::setParameterValue('download', true); } // download if (parent::getParameterValue('download') == true) { trigger_error("Downloading {$file}", E_USER_NOTICE); $ret = utils::downloadSingle($rfile, "compress.zlib://" . $infile, true); if ($ret === false) { continue; } } // process echo "Processing {$snp} (" . ($i + 1) . "/{$n})" . PHP_EOL; $this->parse($infile); parent::writeRDFBufferToWriteFile(); if ($z++ % 10000 == 0) { parent::clear(); } } parent::getWriteFile()->close(); // generate the dataset description file $source_file = (new DataResource($this))->setURI($rfile)->setTitle("dbSNP " . parent::getDatasetVersion())->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z"))->setFormat("application/xml")->setPublisher("http://www.ncbi.nlm.nih.gov")->setHomepage("http://www.ncbi.nlm.nih.gov/SNP/")->setRights("use-share-modify")->setLicense("http://www.ncbi.nlm.nih.gov/About/disclaimer.html")->setDataset("http://identifiers.org/dbsnp/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/dbsnp/dbsnp.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description = $source_file->toRDF() . $output_file->toRDF(); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); }
function process($db) { $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); while ($l = parent::getReadFile()->read()) { list($nsid, $name) = explode("\t", $l); list($ns, $id) = explode(":", $nsid); if (isset($this->idlist) and !in_array($id, $this->idlist)) { continue; } if (isset($this->org)) { $id = $ns . "_" . $id; } $uri = $this->getNamespace() . $id; parent::addRDF(parent::describeIndividual($uri, $name, parent::getVoc() . ucfirst($db)) . parent::describeClass(parent::getVoc() . ucfirst($db), "KEGG {$db}") . parent::triplifyString($uri, parent::getVoc() . "internal-id", $nsid)); // now get the entries for each $lfile = $ldir . $id . ".txt"; $rfile = parent::getParameterValue("download_url") . "get/{$nsid}"; if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') { echo "downloading {$nsid} "; $ret = utils::downloadSingle($rfile, $lfile); if ($ret === false) { echo "unable to download " . $nsid . " ... skipping" . PHP_EOL; continue; } echo "done. "; } echo "parsing {$nsid} ... "; $this->parseEntry($lfile); parent::writeRDFBufferToWriteFile(); if ($db === "pathway") { $ko = str_replace("map", "ko", $id); $lfile = $ldir . $id . ".kgml"; $rfile = "http://www.kegg.jp/kegg-bin/download?entry=" . $ko . "&format=kgml"; if (!file_exists($lfile) || parent::getParameterValue('download') == 'true') { echo "downloading KGML for {$nsid} "; $ret = utils::downloadSingle($rfile, $lfile); if ($ret === false) { echo "unable to download " . $nsid . " ... skipping" . PHP_EOL; continue; } echo "done. "; } $this->parseKGML($lfile); parent::writeRDFBufferToWriteFile(); } echo "done!" . PHP_EOL; } }
function gene_expression() { $h = explode(",", parent::getReadFile()->read()); $expected_columns = 8; if (($n = count($h)) != $expected_columns) { trigger_error("Found {$n} columns in gene file - expecting {$expected_columns}!", E_USER_WARNING); return false; } while ($l = parent::getReadFile()->read(200000)) { $data = str_getcsv($l); $mgi_symbol = $data[0]; $mgi_description = $data[1]; $geneid = $data[2]; $total_datasets = $data[3]; $total_ovexp = $data[4]; $total_underexp = $data[5]; $p_value = $data[6]; $expression = $data[7]; $id = parent::getRes() . md5($geneid . $total_datasets . $total_ovexp . $total_underexp . $p_value . $expression); $evidence_id = parent::getRes() . md5($geneid . $total_datasets . $total_ovexp . $total_underexp . $p_value . $expression . "_evidence"); $label = "Dietary restriction induced " . $expression . "-expression of " . $mgi_symbol . " based on microarray results from " . $total_datasets . " datasets, with p-value " . $p_value; $type_label = "Gene " . ucfirst($expression) . " Expression"; $type = parent::getVoc() . str_replace(" ", "-", $type_label); parent::addRDF(parent::describeIndividual($id, $label, $type) . parent::describeClass($type, $type_label) . parent::triplify($id, parent::getVoc() . "gene", "ncbigene:" . $geneid) . parent::triplifyString("ncbigene:" . $geneid, parent::getVoc() . "mgi-gene-symbol", $mgi_symbol) . parent::triplifyString("ncbigene:" . $geneid, parent::getVoc() . "mgi-gene-description", $mgi_description) . parent::triplify($id, parent::getVoc() . "evidence", $evidence_id) . parent::triplifyString($id, parent::getVoc() . "perturbation-context", "dietary restriction") . parent::triplifyString($evidence_id, parent::getVoc() . "total-number-datasets", $total_datasets) . parent::triplifyString($evidence_id, parent::getVoc() . "total-number-datasets-overexpressed", $total_ovexp) . parent::triplifyString($evidence_id, parent::getVoc() . "total-number-datasets-underexpressed", $total_underexp) . parent::triplifyString($evidence_id, parent::getVoc() . "p-value", $p_value)); parent::writeRDFBufferToWriteFile(); } //while }
function Run() { // directory shortcuts $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); if (parent::getParameterValue('omim_api_key') == '') { $key_file = parent::getParameterValue('omim_api_key_file'); if (file_exists($key_file)) { $key = trim(file_get_contents($key_file)); if ($key) { parent::setParameterValue('omim_api_key', $key); } else { trigger_error("No API key found in the specified omim key file {$key_file}", E_USER_WARNING); } } else { trigger_error("No OMIM key has been provided either by commmand line or in the expected omim key file {$key_file}", E_USER_WARNING); } } // get the list of mim2gene entries $entries = $this->GetListOfEntries($ldir); // get the work specified $list = trim(parent::getParameterValue('files')); if ($list != 'all') { // check if a hyphenated list was provided if (($pos = strpos($list, "-")) !== FALSE) { $start_range = substr($list, 0, $pos); $end_range = substr($list, $pos + 1); // get the whole list $full_list = $this->GetListOfEntries($ldir); // now intersect foreach ($full_list as $e => $type) { if ($e >= $start_range && $e <= $end_range) { $myentries[$e] = $type; } } $entries = $myentries; } else { // for comma separated list $b = explode(",", parent::getParameterValue('files')); foreach ($b as $e) { $myentries[$e] = ''; } $entries = array_intersect_key($entries, $myentries); } } // set the write file $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; $outfile = 'omim.' . parent::getParameterValue('output_format'); parent::setWriteFile($odir . $outfile, $gz); // declare the mapping method types $this->get_method_type(null, true); // iterate over the entries $i = 0; $total = count($entries); foreach ($entries as $omim_id => $type) { echo "processing " . ++$i . " of {$total} - omim# "; $download_file = $ldir . $omim_id . ".json.gz"; $gzfile = "compress.zlib://{$download_file}"; // download if the file doesn't exist or we are told to if (!file_exists($download_file) || parent::getParameterValue('download') == true) { // download using the api $url = parent::getParameterValue('omim_api_url') . '&apiKey=' . parent::getParameterValue('omim_api_key') . '&mimNumber=' . $omim_id; $buf = file_get_contents($url); if (strlen($buf) != 0) { file_put_contents($download_file, $buf); usleep(500000); // limit of 4 requests per second } } // load entry, parse and write to file $entry = json_decode(file_get_contents($gzfile), true); $omim_id = trim((string) $entry["omim"]["entryList"][0]["entry"]['mimNumber']); echo $omim_id; $this->ParseEntry($entry, $type); parent::writeRDFBufferToWriteFile(); echo PHP_EOL; } parent::writeRDFBufferToWriteFile(); parent::getWriteFile()->close(); // generate the dataset description file $source_file = (new DataResource($this))->setURI(parent::getParameterValue('omim_api_url'))->setTitle("OMIM " . parent::getDatasetVersion())->setRetrievedDate(date("Y-m-d\\TG:i:s\\Z"))->setFormat("application/json")->setPublisher("http://omim.org")->setHomepage("http://omim.org")->setRights("use")->setRights("no-commercial")->setRights("registration-required")->setLicense("http://www.omim.org/help/agreement")->setDataset("http://identifiers.org/omim/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = date("Y-m-d\\TG:i:s\\Z"); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$outfile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix} (generated at {$date})")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/omim/omim.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dataset_description = $source_file->toRDF() . $output_file->toRDF(); parent::setWriteFile($odir . parent::getBio2RDFReleaseFile()); parent::getWriteFile()->write($dataset_description); parent::getWriteFile()->close(); return TRUE; }
function Parse() { $l = parent::getReadFile()->read(100000); $header = explode("\t", trim(substr($l, 1))); if (($c = count($header)) != 54) { trigger_erorr("Expecting 54 columns, found {$c}!"); return FALSE; } // check # of columns while ($l = parent::getReadFile()->read(500000)) { $a = explode("\t", trim($l)); // irefindex identifiers $rigid = "irefindex." . $a[34]; # checksum for interaction $rogida = "irefindex." . $a[32]; # checksum for A $rogidb = "irefindex." . $a[33]; # checksum for B $irigid = "irefindex.irigid:" . $a[44]; # integer id for interaction $irogida = "irefindex.irogid:" . $a[42]; # integer id for A $irogidb = "irefindex.irogid:" . $a[43]; # integer id for B $crigid = "irefindex.crigid:" . $a[47]; # checksum for canonical interaction $icrigid = "irefindex.icrigid:" . $a[50]; # integer id for canonical interaction $crogida = "irefindex.crogid:" . $a[45]; # checksum for A's canonical group $crogidb = "irefindex.crogid:" . $a[46]; # checksum for B's canonical group $icrogida = "irefindex.icrogid:" . $a[48]; # integer for A's canonical group $icrogidb = "irefindex.icrogid:" . $a[49]; # integer for B's canonical group // 13 contains the original identifier, the rigid, and the edgetype $ids = explode("|", $a[13]); if (count($ids) != 3) { trigger_error("Expecting 3 entries in column 14"); print_r($ids); exit; } parent::getRegistry()->parseQName($ids[0], $ns, $id); if ($id == '-') { // this happens with hprd $iid = "hprd:" . substr($ids[1], 6); } else { $iid = $ns . ":" . $id; } // get the type if ($a[52] == "X") { $label = "{$a['0']} - {$a['1']} Interaction"; $type = "Pairwise-Interaction"; } else { if ($a[52] == "C") { $label = $a[53] . " component complex"; #num of participants $type = "Multimeric-Complex"; } else { if ($a[52] == "Y") { $label = "{$a['0']} homomeric complex"; $type = "Homopolymeric-Complex"; } } } parent::addRDF(parent::describeIndividual($iid, $label, parent::getVoc() . $type) . parent::describeClass(parent::getVoc() . $type, str_replace("-", " ", $type))); // interaction type[52] by method[6] unset($method); if ($a[6] != '-') { $data = $this->ParseStringArray($a[6]); $method = trim($data["label"]); $qname = trim($data["ns"]) . ":" . trim($data["id"]); if ($qname) { parent::addRDF(parent::triplify($iid, parent::getVoc() . "method", $qname) . parent::describeClass($qname, $data['label'])); } } parent::addRDF(parent::triplify($iid, "rdfs:seeAlso", "http://wodaklab.org/iRefWeb/interaction/show/" . $a[50])); // set the interactors for ($i = 0; $i <= 1; $i++) { $p = 'a'; if ($i == 1) { $p = 'b'; } $data = $this->ParseStringArray($a[$i]); $interactor = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}", $interactor)); // biological role $role = $a[16 + $i]; if ($role != '-') { $data = $this->ParseStringArray($role); $qname = trim($data["ns"]) . ":" . trim($data["id"]); if ($qname != "mi:0000") { parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}" . "_biological_role", $qname) . parent::describeClass($qname, $data['label'])); } } // experimental role $role = $a[18 + $i]; if ($role != '-') { $data = $this->ParseStringArray($role); $qname = trim($data["ns"]) . ":" . trim($data["id"]); if ($qname != "mi:0000") { parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}" . "_experimental_role", $qname) . parent::describeClass($qname, $data['label'])); } } // interactor type $type = $a[20 + $i]; if ($type != '-') { $data = $this->ParseStringArray($type); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($interactor, "rdf:type", $qname) . parent::describeClass($qname, $data['label'])); } } // add the alternatives through the taxon + seq redundant group for ($i = 2; $i <= 3; $i++) { $taxid = ''; $rogid = "irefindex." . $a[32 + ($i - 2)]; parent::addRDF(parent::describeIndividual($rogid, "", parent::getVoc() . "Taxon-Sequence-Identical-Group") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Identical-Group", "Taxon + Sequence Identical Group")); $tax = $a[9 + ($i - 2)]; if ($tax && $tax != '-' && $tax != '-1') { $data = $this->ParseStringArray($tax); $taxid = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($rogid, parent::getVoc() . "x-taxonomy", $taxid)); } $list = explode("|", $a[3 + ($i - 2)]); foreach ($list as $item) { $data = $this->ParseStringArray($item); $ns = trim($data["ns"]); $id = trim($data["id"]); $qname = $ns . ":" . $id; if ($ns && $ns != 'rogid' && $ns != 'irogid' and $id != '-') { parent::addRDF(parent::triplify($rogid, parent::getVoc() . "has-member", $qname)); if ($taxid && $taxid != '-' && $taxid != '-1') { parent::addRDF(parent::triplify($qname, parent::getVoc() . "x-taxonomy", $taxid)); } } } } // publications $list = explode("|", $a[8]); foreach ($list as $item) { if ($item == '-' && $item != 'pubmed:0') { continue; } $data = $this->ParseStringArray($item); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, parent::getVoc() . "article", $qname)); } // MI interaction type if ($a[11] != '-' && $a[11] != 'NA') { $data = $this->ParseStringArray($a[11]); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, "rdf:type", $qname)); if (!isset($defined[$qname])) { $defined[$qname] = ''; parent::addRDF(parent::triplifyString($qname, "rdfs:label", $data['label'])); } } // source if ($a[12] != '-') { $data = $this->ParseStringArray($a[12]); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, parent::getVoc() . "source", $qname)); } // confidence $list = explode("|", $a[14]); foreach ($list as $item) { $data = $this->ParseStringArray($item); $ns = trim($data["ns"]); $id = trim($data["id"]); if ($ns == 'lpr') { // lowest number of distinct interactions that any one article reported parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "minimum-number-interactions-reported", $id)); } else { if ($ns == "hpr") { // higher number of distinct interactions that any one article reports parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "maximum-number-interactions-reported", $id)); } else { if ($ns = 'hp') { // total number of unique PMIDs used to support the interaction parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "number-supporting-articles", $id)); } } } } // expansion method if ($a[15]) { $id = parent::getRes() . md5($a[15]); parent::addRDF(parent::describeIndividual($id, $a[15], parent::getVoc() . "Expansion-Method") . parent::describeClass(parent::getVoc() . "Expansion-Method", "Expansion Method") . parent::triplify($iid, parent::getVoc() . "expansion-method", $id)); } // host organism if ($a[28] != '-') { $data = $this->ParseStringArray($a[28]); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, parent::getVoc() . "host-organism", $qname)); } // @todo add to record // created 2010/05/18 $date = str_replace("/", "-", $a[30]) . "T00:00:00Z"; parent::addRDF(parent::triplifyString($iid, "dc:created", $date, "xsd:dateTime")); // taxon-sequence identical interaction group parent::addRDF(parent::triplify($iid, parent::getVoc() . "taxon-sequence-identical-interaction", $rigid) . parent::triplify($rigid, "rdf:type", parent::getVoc() . "Taxon-Sequence-Identical-Interaction") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Identical-Interaction", "Taxon + Sequence Identical Interaction") . parent::triplify($rigid, parent::getVoc() . "irigid", $irigid) . parent::triplify($rigid, parent::getVoc() . "interactor-a", $rogida) . parent::triplify($rogida, parent::getVoc() . "irogid", $irogida) . parent::triplify($rigid, parent::getVoc() . "interactor-b", $rogidb) . parent::triplify($rogidb, parent::getVoc() . "irogid", $irogidb) . parent::triplify($rogida, parent::getVoc() . "canonical-group", $crogida) . parent::triplify($rogidb, parent::getVoc() . "canonical-group", $crogidb) . parent::triplify($rigid, parent::getVoc() . "taxon-sequence-similar-interaction", $crigid) . parent::triplify($crigid, "rdf:type", parent::getVoc() . "Taxon-Sequence-Canonical-Interaction") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Canonical-Interaction", "Taxon + Sequence Canonical Interaction") . parent::triplify($crigid, parent::getVoc() . "icrigid", $icrigid) . parent::triplify($crigid, parent::getVoc() . "interactor-a-canonical-group", $crogida) . parent::triplify($crogida, "rdf:type", parent::getVoc() . "Taxon-Sequence-Similar-Group") . parent::triplify($crogida, parent::getVoc() . "icrogid", $icrogida) . parent::triplify($crigid, parent::getVoc() . "interactor-b-canonical-group", $crogidb) . parent::triplify($crogidb, "rdf:type", parent::getVoc() . "Taxon-Sequence-Similar-Group") . parent::triplify($crogidb, parent::getVoc() . "icrogid", $icrogidb) . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Similar-Group", "Taxon + Sequence Similar Group")); parent::writeRDFBufferToWriteFile(); } }
function psiblast() { while ($l = $this->GetReadFile()->Read(2048)) { $a = explode("\t", trim($l)); $id1 = $a[0]; $id2 = $a[7]; $id = "aln_{$id1_}{$id2}"; $this->AddRDF(parent::describeIndividual($this->getRes() . $id, "psiblast alignment between {$id1} and {$id2}", $this->getVoc() . "PSI-BLAST-Alignment") . parent::describeClass($this->getVoc() . "PSI-BLAST-Alignment", "PSI-Blast Alignment") . parent::triplify($this->getRes() . $id, $this->getVoc() . "query", $this->getNamespace() . $id1) . parent::triplify($this->getRes() . $id, $this->getVoc() . "target", $this->getNamespace() . $id2) . parent::triplifyString($this->getRes() . $id, $this->getVoc() . "query-start", $a[1]) . parent::triplifyString($this->getRes() . $id, $this->getVoc() . "query-stop", $a[2]) . parent::triplifyString($this->getRes() . $id, $this->getVoc() . "target-start", $a[3]) . parent::triplifyString($this->getRes() . $id, $this->getVoc() . "target-stop", $a[4]) . parent::triplifyString($this->getRes() . $id, $this->getVoc() . "percent-aligned", $a[5]) . parent::triplifyString($this->getRes() . $id, $this->getVoc() . "score", $a[6]) . parent::triplifyString($this->getRes() . $id, $this->getVoc() . "is-encoded-by", "taxon:" . $a[8]) . parent::describeProperty($this->getVoc() . "target-start", "Relationship between an SGD sequence alignment and its target sequence start position") . parent::describeProperty($this->getVoc() . "target-stop", "Relationship between an SGD sequence alignment and its target sequence stop position") . parent::describeProperty($this->getVoc() . "score", "Relationship between an SGD sequence alignment and its score") . parent::describeProperty($this->getVoc() . "percent-aligned", "Relationship between an SGD sequence alignment and its percent-aligned value") . parent::describeProperty($this->getVoc() . "is-encoded-by", "Relationship between an SGD sequence alignment and the taxon the aligned sequences are encoded by")); parent::writeRDFBufferToWriteFile(); } //while return TRUE; }
function parseDrugEntry(&$xml) { $declared = null; // a list of all the entities declared $counter = 1; $x = $xml->GetXMLRoot(); $dbid = (string) $x->{"drugbank-id"}; $did = "drugbank:" . $dbid; $name = (string) $x->name; $type = ucfirst((string) str_replace(" ", "-", $x->attributes()->type)); $type_label = ucfirst($x->attributes()->type); $description = null; if (isset($this->id_list)) { if (!isset($this->id_list[$dbid])) { return; } unset($this->id_list[$dbid]); } echo "Processing {$dbid}" . PHP_EOL; if (isset($x->description) && $x->description != '') { $description = trim((string) $x->description); } parent::addRDF(parent::describeIndividual($did, $name, parent::getVoc() . "Drug", $name, $description) . parent::describeClass(parent::getVoc() . "Drug", "Drug") . parent::triplify($did, "owl:sameAs", "http://identifiers.org/drugbank/" . $dbid) . parent::triplify($did, "rdfs:seeAlso", "http://www.drugbank.ca/drugs/" . $dbid) . parent::triplify($did, "rdf:type", parent::getVoc() . $type) . parent::describeClass(parent::getVoc() . $type, $type_label)); foreach ($x->{'drugbank-id'} as $id) { parent::addRDF(parent::triplifyString($did, parent::getVoc() . "drugbank-id", $id)); } if (isset($x->{'cas-number'})) { parent::addRDF(parent::triplify($did, parent::getVoc() . "x-cas", "cas:" . $x->{'cas-number'})); } $literals = array("indication", "pharmacodynamics", "mechanism-of-action", "toxicity", "biotransformation", "absorption", "half-life", "protein-binding", "route-of-elimination", "volume-of-distribution", "clearance"); foreach ($literals as $l) { if (isset($x->{$l}) and $x->{$l} != '') { $lid = parent::getRes() . md5($l . $x->{$l}); parent::addRDF(parent::describeIndividual($lid, "{$l} for {$did}", parent::getVoc() . ucfirst($l), "{$l} for {$did}", $x->{$l}) . parent::describeClass(parent::getVoc() . ucfirst($l), ucfirst(str_replace("-", " ", $l))) . parent::triplify($did, parent::getVoc() . $l, $lid)); } } // TODO:: Replace the next two lines $this->AddList($x, $did, "groups", "group", parent::getVoc() . "group"); $this->AddList($x, $did, "categories", "category", parent::getVoc() . "category"); if (isset($x->classification)) { foreach ($x->classification->children() as $k => $v) { $cid = parent::getRes() . md5($v); parent::addRDF(parent::describeIndividual($cid, $v, parent::getVoc() . "Drug-Classification-Category") . parent::describeClass(parent::getVoc() . "Drug-Classification-Category", "Drug Classification Category") . parent::triplify($did, parent::getVoc() . "drug-classification-category", $cid)); } } $this->addLinkedResource($x, $did, 'atc-codes', 'atc-code', 'atc'); $this->addLinkedResource($x, $did, 'ahfs-codes', 'ahfs-code', 'ahfs'); // taxonomy $this->AddText($x, $did, "taxonomy", "kingdom", parent::getVoc() . "kingdom"); // substructures $this->AddText($x, $did, "taxonomy", "substructures", parent::getVoc() . "substructure", "substructure"); // synonyms $this->AddCategory($x, $did, "synonyms", "synonym", parent::getVoc() . "synonym"); // brand names $this->AddCategory($x, $did, "international-brands", "international-brand", parent::getVoc() . "brand"); // salt if (isset($x->salts->salt)) { foreach ($x->salts->salt as $s) { $sid = parent::getPrefix() . ':' . $s->{'drugbank-id'}; parent::addRDF(parent::describeIndividual($sid, $s->name, parent::getVoc() . "Salt") . parent::describeClass(parent::getVoc() . "Salt", "Salt") . parent::triplify($did, parent::getVoc() . "salt", $sid) . parent::triplify($sid, parent::getVoc() . "x-cas", "cas:" . $s->{'cas-number'}) . parent::triplify($sid, parent::getVoc() . "x-inchikey", "inchikey:" . $s->{'inchikey'})); } } // mixtures // <mixtures><mixture><name>Cauterex</name><ingredients>dornase alfa + fibrinolysin + gentamicin sulfate</ingredients></mixture> if (isset($x->mixtures)) { $id = 0; foreach ($x->mixtures->mixture as $item) { if (isset($item)) { $o = $item; $mid = parent::getRes() . str_replace(" ", "-", $o->name[0]); parent::addRDF(parent::triplify($did, parent::getVoc() . "mixture", $mid) . parent::describeIndividual($mid, $o->name[0], parent::getVoc() . "Mixture") . parent::describeClass(parent::getVoc() . "Mixture", "mixture") . parent::triplifyString($mid, $this->getVoc() . "ingredients", "" . $o->ingredients[0])); $a = explode(" + ", $o->ingredients[0]); foreach ($a as $b) { $b = trim($b); $iid = parent::getRes() . str_replace(" ", "-", $b); parent::addRDF(parent::describeClass($iid, $b, parent::getVoc() . "Ingredient") . parent::describeClass(parent::getVoc() . "Ingredient", "Ingredient") . parent::triplify($mid, parent::getVoc() . "ingredient", $iid)); } } } } // packagers // <packagers><packager><name>Cardinal Health</name><url>http://www.cardinal.com</url></packager> if (isset($x->packagers)) { foreach ($x->packagers as $items) { if (isset($items->packager)) { foreach ($items->packager as $item) { $pid = parent::getRes() . md5($item->name); parent::addRDF(parent::triplify($did, parent::getVoc() . "packager", $pid)); if (!isset($defined[$pid])) { $defined[$pid] = ''; parent::addRDF(parent::describe($pid, "" . $item->name[0])); if (strstr($item->url, "http://") && $item->url != "http://BASF Corp.") { parent::addRDF($this->triplify($pid, "rdfs:seeAlso", "" . $item->url[0])); } } } } } } // manufacturers $this->AddText($x, $did, "manufacturers", "manufacturer", parent::getVoc() . "manufacturer"); // @TODO RESOURCE // prices if (isset($x->prices->price)) { foreach ($x->prices->price as $product) { $pid = parent::getRes() . md5($product->description); parent::addRDF(parent::describeIndividual($pid, $product->description, parent::getVoc() . "Pharmaceutical", $product->description) . parent::describeClass(parent::getVoc() . "Pharmaceutical", "pharmaceutical") . parent::triplifyString($pid, parent::getVoc() . "price", "" . $product->cost, "xsd:float") . parent::triplify($did, parent::getVoc() . "product", $pid)); $uid = parent::getVoc() . md5($product->unit); parent::addRDF(parent::describeIndividual($uid, $product->unit, parent::getVoc() . "Unit", $product->unit) . parent::describeClass(parent::getVoc() . "Unit", "unit") . parent::triplify($pid, parent::getVoc() . "form", $uid)); } } // dosages <dosages><dosage><form>Powder, for solution</form><route>Intravenous</route><strength></strength></dosage> if (isset($x->dosages->dosage)) { foreach ($x->dosages->dosage as $dosage) { $id = parent::getRes() . md5($dosage->strength . $dosage->form . $dosage->route); $label = ($dosage->strength != '' ? $dosage->strength . " " : "") . $dosage->form . " form with " . $dosage->route . " route"; parent::addRDF(parent::describeIndividual($id, $label, parent::getVoc() . "Dosage") . parent::describeClass(parent::getVoc() . "Dosage", "Dosage") . parent::triplify($did, parent::getVoc() . "dosage", $id)); $rid = parent::getVoc() . md5($dosage->route); $this->typify($id, $rid, "Route", "" . $dosage->route); $fid = parent::getVoc() . md5($dosage->form); $this->typify($id, $fid, "Form", "" . $dosage->form); if ($dosage->strength != '') { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "strength", $dosage->strength)); } } } // experimental-properties $props = array("experimental-properties", "calculated-properties"); foreach ($props as $prop) { $subtype = substr($prop, 0, strpos("-", $prop)); if (isset($x->{$prop})) { foreach ($x->{$prop} as $properties) { foreach ($properties as $property) { $type = (string) $property->kind; $value = (string) $property->value; $type_uri = parent::getVoc() . ucfirst(str_replace(" ", "-", $type)); $id = parent::getRes() . $prop . "-" . $dbid . "-" . $counter++; $label = $property->kind . ": {$value}" . ($property->source == '' ? '' : " from " . $property->source); parent::addRDF(parent::describeIndividual($id, $label, $type_uri) . parent::describeClass($type_uri, $type, parent::getVoc() . ucfirst($prop)) . parent::describeClass(parent::getVoc() . ucfirst($prop), str_replace("-", " ", $prop)) . parent::triplifyString($id, $this->getVoc() . "value", $value) . parent::triplify($did, $this->getVoc() . $prop, $id)); // Source if (isset($property->source)) { foreach ($property->source as $source) { $s = (string) $source; if ($s == '') { continue; } $sid = parent::getRes() . md5($s); parent::addRDF(parent::describeIndividual($sid, $s, parent::getVoc() . "Source") . parent::describeClass(parent::getVoc() . "Source", "Source") . parent::triplify($id, parent::getVoc() . "source", $sid)); } } } } } } // identifiers // <patents><patent><number>RE40183</number><country>United States</country><approved>1996-04-09</approved> <expires>2016-04-09</expires> if (isset($x->patents->patent)) { foreach ($x->patents->patent as $patent) { $id = "uspto:" . $patent->number; parent::addRDF(parent::triplify($did, $this->getVoc() . "patent", $id) . parent::describeIndividual($id, $patent->country . " patent " . $patent->number, $this->getVoc() . "Patent") . parent::describeClass(parent::getVoc() . "Patent", "patent") . parent::triplifyString($id, $this->getVoc() . "approved", "" . $patent->approved) . parent::triplifyString($id, $this->getVoc() . "expires", "" . $patent->expires)); $cid = parent::getRes() . md5($patent->country); $this->typify($id, $cid, "Country", "" . $patent->country); } } // partners $partners = array('target', 'enzyme', 'transporter', 'carrier'); foreach ($partners as $partner) { $plural = $partner . 's'; if (isset($x->{$plural})) { foreach ($x->{$plural} as $list) { foreach ($list->{$partner} as $item) { $this->parsePartnerRelation($did, $item, $partner); parent::writeRDFBufferToWriteFile(); } } } } // drug-interactions $y = (int) substr($dbid, 2); if (isset($x->{"drug-interactions"})) { foreach ($x->{"drug-interactions"} as $ddis) { foreach ($ddis->{"drug-interaction"} as $ddi) { $dbid2 = $ddi->{'drugbank-id'}; if ($dbid < $dbid2) { // don't repeat $ddi_id = parent::getRes() . $dbid . "_" . $dbid2; parent::addRDF(parent::triplify("drugbank:" . $dbid, parent::getVoc() . "ddi-interactor-in", "" . $ddi_id) . parent::triplify("drugbank:" . $dbid2, parent::getVoc() . "ddi-interactor-in", "" . $ddi_id) . parent::describeIndividual($ddi_id, "DDI between {$name} and " . $ddi->name . " - " . $ddi->description, parent::getVoc() . "Drug-Drug-Interaction") . parent::describeClass(parent::getVoc() . "Drug-Drug-Interaction", "drug-drug interaction")); } } } } // food-interactions $this->AddText($x, $did, "food-interactions", "food-interaction", parent::getVoc() . "food-interaction"); // affected-organisms $this->AddCategory($x, $did, "affected-organisms", "affected-organism", parent::getVoc() . "affected-organism"); // <external-identifiers> if (isset($x->{"external-identifiers"})) { foreach ($x->{"external-identifiers"} as $objs) { foreach ($objs as $obj) { $ns = $this->NSMap($obj->resource); $id = $obj->identifier; if ($ns == "genecards") { $id = str_replace(array(" "), array("_"), $id); } parent::addRDF(parent::triplify($did, parent::getVoc() . "x-{$ns}", "{$ns}:{$id}")); if ($ns == "pubchemcompound") { parent::addRDF(parent::triplify("{$ns}:{$id}", "skos:exactMatch", "http://rdf.ncbi.nlm.nih.gov/pubchem/compound/{$id}")); } } } } // <external-links> if (isset($x->{"external-links"})) { foreach ($x->{"external-links"}->{'external-link'} as $el) { if (strpos($el->url, 'http') !== false) { parent::addRDF(parent::triplify($did, "rdfs:seeAlso", "" . $el->url)); } } } parent::writeRDFBufferToWriteFile(); }
function genes($file) { $xml = new CXML($file); while ($xml->parse("DisorderList") == TRUE) { $x = $xml->GetXMLRoot(); foreach ($x->Disorder as $d) { $orphanet_id = parent::getNamespace() . (string) $d->OrphaNumber; $disorder_name = (string) $d->Name; foreach ($d->DisorderGeneAssociationList->DisorderGeneAssociation as $dga) { // gene $gene = $dga->Gene; $gene_id = parent::getNamespace() . (string) $gene->OrphaNumber; $gene_internal_id = (string) $gene->attributes()->id; $gene_label = (string) $gene->Name; $gene_symbol = (string) $gene->Symbol; parent::addRDF(parent::describeIndividual($gene_id, $gene_label, parent::getVoc() . "Gene") . parent::describeClass(parent::getVoc() . "Gene", "orphanet gene") . parent::triplifyString($gene_id, parent::getVoc() . "symbol", $gene_symbol)); foreach ($gene->SynonymList as $s) { $synonym = (string) $s->Synonym; parent::addRDF(parent::triplifyString($gene_id, parent::getVoc() . "synonym", $synonym)); } foreach ($gene->ExternalReferenceList as $erl) { $er = $erl->ExternalReference; $db = (string) $er->Source; $db = parent::getRegistry()->getPreferredPrefix($db); $id = (string) $er->Reference; $xref = "{$db}:{$id}"; parent::addRDF(parent::triplify($gene_id, parent::getVoc() . "x-{$db}", $xref)); } $dga_id = parent::getRes() . (string) $d->OrphaNumber . "_" . md5($dga->asXML()); $ga = $dga->DisorderGeneAssociationType; $ga_id = parent::getNamespace() . (string) $ga->attributes()->id; $ga_label = (string) $ga->Name; $s = $dga->DisorderGeneAssociationStatus; $s_id = parent::getNamespace() . (string) $s->attributes()->id; $s_label = (string) $s->Name; parent::addRDF(parent::describeIndividual($dga_id, "{$ga_label} {$gene_label} in {$disorder_name} ({$s_label})", $ga_id) . parent::describeClass($ga_id, $ga_label, parent::getVoc() . "Disorder-Gene-Association") . parent::triplify($dga_id, parent::getVoc() . "status", $s_id) . parent::describeClass($s_id, $s_label, parent::getVoc() . "Disorder-Gene-Association-Status") . parent::triplify($dga_id, parent::getVoc() . "disorder", $orphanet_id) . parent::describeIndividual($orphanet_id, $disorder_name, parent::getVoc() . "Disorder") . parent::triplify($dga_id, parent::getVoc() . "gene", $gene_id)); } parent::writeRDFBufferToWriteFile(); } } unset($xml); }
private function geneinfo() { $i = 1; $header = $this->GetReadFile()->Read(200000); while ($aLine = $this->GetReadFile()->Read(200000)) { if ($i++ % 1000 == 0) { parent::clear(); } $a = $splitLine = explode("\t", $aLine); if (count($splitLine) == 15) { $taxid = "taxon:" . trim($splitLine[0]); if (isset($this->taxids) and !isset($this->taxids[trim($splitLine[0])])) { continue; } $aGeneId = trim($splitLine[1]); $geneid = "ncbigene:" . trim($splitLine[1]); $symbol = addslashes(stripslashes(trim($splitLine[2]))); $symbolid = "symbol:{$symbol}"; $locusTag = trim($splitLine[3]); $symbols_arr = explode("|", $splitLine[4]); $dbxrefs_arr = explode("|", $splitLine[5]); $chromosome = trim($splitLine[6]); $map_location = trim($splitLine[7]); $description = addslashes(stripslashes(trim($splitLine[8]))); $type_of_gene = trim($splitLine[9]); $symbol_authority = addslashes(stripslashes(trim($splitLine[10]))); $symbol_auth_full_name = addslashes(stripslashes(trim($splitLine[11]))); $nomenclature_status = addslashes(stripslashes(trim($splitLine[12]))); $other_designations = addslashes(stripslashes(trim($splitLine[13]))); $mod_date = date_parse(trim($splitLine[14])); //check for a valid symbol if ($symbol != "NEWENTRY") { $this->AddRDF(parent::describeIndividual($geneid, "{$description} ({$symbolid}, {$taxid})", $this->getVoc() . "Gene") . parent::triplify($geneid, $this->getVoc() . "x-taxonomy", $taxid) . parent::triplifyString($geneid, $this->getVoc() . "symbol", $symbol) . parent::triplifyString($geneid, $this->getVoc() . "locus", addslashes(stripslashes($locusTag))) . parent::describeClass($this->getVoc() . "Gene", "NCBI Gene gene")); if ($type_of_gene != '-') { $this->AddRDF(parent::triplify($geneid, "rdf:type", $this->getVoc() . ucfirst($type_of_gene) . "-Gene") . parent::describeClass($this->getVoc() . ucfirst($type_of_gene) . "-Gene", ucfirst($type_of_gene) . " Gene")); } //symbol synonyms foreach ($symbols_arr as $s) { if ($s != "-") { $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "symbol-synonym", addslashes(stripslashes($s)))); } } //dbxrefs foreach ($dbxrefs_arr as $dbx) { if ($dbx != "-") { $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "dbxref", $dbx)); } } //chromosome if ($chromosome != "-") { $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "chromosome", $chromosome)); } //map location if ($map_location != "-") { $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "map-location", $map_location)); } //description if ($description != "-") { $this->AddRDF(parent::triplifyString($geneid, "dc:description", $description)); } //nomenclature authority if ($symbol_authority != "-") { $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "nomenclature-authority", $symbol_authority)); if ($symbol_auth_full_name != "-") { $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "nomenclature-authority-fullname", $symbol_auth_full_name)); } } //nomenclature status if ($nomenclature_status != "-") { $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "nomenclature-status", $nomenclature_status)); } //other designations if ($other_designations != "-") { foreach (explode("|", $other_designations) as $d) { $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "other-designation", $d)); } } //modification date if ($mod_date != "-") { $this->AddRDF(parent::triplifyString($geneid, $this->getVoc() . "modification-date", $mod_date["year"] . "-" . $mod_date["month"] . "-" . $mod_date["day"])); } } } parent::writeRDFBufferToWriteFile(); } // while }
function process($file) { $z = 1; while ($l = parent::getReadFile()->read(100000)) { if ($z % 100000 == 0) { parent::clear(); } if ($l[0] == "!") { continue; } $fields = explode("\t", $l); if (count($fields) != 17) { trigger_error("Expected 17 columns, but found " . count($fields), E_USER_ERROR); return false; } //get the Go id $db = $fields[0]; $id = $fields[1]; $symbol = $fields[2]; $qualifier = $fields[3]; $goid = substr($fields[4], 3); $refs = $this->getDbReferences($fields[5]); $eco = $this->getEvidenceCodeLabelArr($fields[6]); $aspect = $this->getAspect($fields[8]); $label = $fields[9]; $synonyms = explode("|", $fields[10]); $taxid = $fields[12]; $date = $this->parseDate($fields[13]); $assignedBy = $fields[14]; //entity id $eid = $this->getdbURI($db, $id); if (!$eid) { print_r($fields); continue; } parent::addRDF(parent::describeIndividual($eid, $label, parent::getVoc() . "GO-Annotation") . parent::describeClass(parent::getVoc() . "GO-Annotation", "GO Annotation") . parent::triplifyString($eid, parent::getVoc() . "symbol", $symbol)); parent::addRDF(parent::triplify($eid, parent::getVoc() . "x-taxonomy", $taxid)); foreach ($synonyms as $s) { if (!empty($s)) { parent::addRDF(parent::triplifyString($eid, parent::getVoc() . "synonym", $s)); } } $rel = $aspect; if ($qualifier == 'NOT') { if ($aspect == 'process') { $rel = 'not-in-process'; } if ($aspect == 'function') { $rel = 'not-has-function'; } if ($aspect == 'component') { $rel = 'not-in-component'; } } parent::addRDF(parent::describeObjectProperty(parent::getVoc() . $rel, str_replace("-", " ", $rel)) . parent::triplify($eid, parent::getVoc() . $rel, "go:" . $goid)); $type = key($eco); $aid = parent::getRes() . $file . "_" . $z++; parent::addRDF(parent::describeObjectProperty(parent::getVoc() . "go-annotation", "GO annotation") . parent::triplify($eid, parent::getVoc() . "go-annotation", $aid)); $cat = parent::getRes() . md5($aspect); parent::addRDF(parent::describeIndividual($aid, "{$id}-go:{$goid} association", parent::getVoc() . "GO-Annotation") . parent::triplify($aid, parent::getVoc() . "target", $eid) . parent::triplify($aid, parent::getVoc() . "go-term", "go:" . $goid) . parent::triplify($aid, parent::getVoc() . "evidence", "eco:" . $eco[$type][1]) . parent::triplify($aid, parent::getVoc() . "go-category", $cat) . parent::describeClass($cat, $aspect) . parent::triplifyString($aid, parent::getVoc() . "assigned-by", $assignedBy)); if ($date != '') { parent::addRDF(parent::triplifyString($aid, parent::getVoc() . "entry-date", $date . "T00:00:00Z", "xsd:dateTime")); } foreach ($refs as $ref) { $b = explode(":", $ref); if ($b[0] == 'PMID') { parent::addRDF(parent::triplify($aid, parent::getVoc() . "article", "pubmed:" . $b[1])); } } //write RDF to file parent::writeRDFBufferToWriteFile(); } }
function functional_descriptions() { while ($l = $this->getReadFile()->read(2000000)) { if ($l[0] == "#") { continue; } if (strstr($l, "gene_id")) { continue; } // gene_id public_name molecular_name concise_description provisional_description detailed_description automated_description gene_class_description $a = explode("\t", $l); if (count($a) != 8) { trigger_error("Found one row that only has " . count($a) . " columns, expecting 8", E_USER_ERROR); continue; } $id = parent::getNamespace() . $a[0]; $label = $a[1] . ($a[2] ? " (" . $a[2] . ")" : ""); parent::addRDF(parent::describeIndividual($id, $label, parent::getVoc() . "Gene") . parent::describeClass(parent::getVoc() . "Gene", "Wormbase Gene") . parent::triplifyString($id, parent::getVoc() . "concise-description", $a[3]) . parent::triplifyString($id, parent::getVoc() . "provisional-description", $a[4]) . parent::triplifyString($id, parent::getVoc() . "detailed-description", $a[5]) . parent::triplifyString($id, parent::getVoc() . "automated-description", $a[6]) . parent::triplifyString($id, parent::getVoc() . "gene-class-description", trim($a[7]))); parent::writeRDFBufferToWriteFile(); } }
function Run() { $sp = trim(parent::getParameterValue('files')); if ($sp == 'all') { $files = $this->getPackageMap(); } else { $s_a = explode(",", $sp); $pm = $this->getPackageMap(); $files = array(); foreach ($s_a as $a) { if (array_key_exists($a, $pm)) { $files[$a] = $pm[$a]; } } } //else $ldir = parent::getParameterValue('indir'); $odir = parent::getParameterValue('outdir'); $dd = ''; //now iterate over the files array $year = parent::getParameterValue('year'); foreach ($files as $k => $fpattern) { $file = str_replace("YEAR", $year, $fpattern); $lfile = $ldir . $file; $rfile = parent::getParameterValue("download_url") . $file; // download if necessary if (!file_exists($lfile) || parent::getParameterValue('download') == "true") { echo "Downloading {$file} ... "; $ret = utils::downloadSingle($rfile, $lfile); if ($ret === FALSE) { trigger_error("Unable to get {$file}", E_USER_ERROR); continue; } echo "done!" . PHP_EOL; } //set the outfile $ofile = "mesh_" . $k . "." . parent::getParameterValue('output_format'); $gz = strstr(parent::getParameterValue('output_format'), "gz") ? true : false; echo "processing {$k} ..."; parent::setReadFile($lfile, FALSE); parent::setWriteFile($odir . $ofile, $gz); $fnx = $k; $this->{$fnx}(); parent::writeRDFBufferToWriteFile(); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; $source_file = (new DataResource($this))->setURI($rfile)->setTitle("MeSH")->setRetrievedDate(parent::getDate(filemtime($lfile)))->setFormat("text/x-mesh-record")->setPublisher("http://www.nlm.nih.gov")->setHomepage("http://www.nlm.nih.gov/mesh/")->setRights("use")->setLicense("http://www.nlm.nih.gov/databases/download.html")->setDataset("http://identifiers.org/mesh/"); $prefix = parent::getPrefix(); $bVersion = parent::getParameterValue('bio2rdf_release'); $date = parent::getDate(filemtime($odir . $ofile)); $output_file = (new DataResource($this))->setURI("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$ofile}")->setTitle("Bio2RDF v{$bVersion} RDF version of {$prefix}")->setSource($source_file->getURI())->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/mesh/mesh.php")->setCreateDate($date)->setHomepage("http://download.bio2rdf.org/release/{$bVersion}/{$prefix}/{$prefix}.html")->setPublisher("http://bio2rdf.org")->setRights("use-share-modify")->setRights("by-attribution")->setRights("restricted-by-source-license")->setLicense("http://creativecommons.org/licenses/by/3.0/")->setDataset(parent::getDatasetURI()); if ($gz) { $output_file->setFormat("application/gzip"); } if (strstr(parent::getParameterValue('output_format'), "nt")) { $output_file->setFormat("application/n-triples"); } else { $output_file->setFormat("application/n-quads"); } $dd .= $source_file->toRDF() . $output_file->toRDF(); } //foreach parent::setWriteFile($odir . $this->getBio2RDFReleaseFile($this->getNamespace())); parent::getWriteFile()->write($dd); parent::getWriteFile()->close(); echo "done!" . PHP_EOL; }
function Parse($xml) { // state the dataset info foreach ($xml->release->dbinfo as $o) { $db = $o->attributes()->dbname . " v" . $o->attributes()->version . " (" . $o->attributes()->entry_count . " entries) [" . $o->attributes()->file_date . "]"; parent::addRDF(parent::triplifyString(parent::getDatasetURI(), parent::getVoc() . "contains", $db)); if ((string) $o->attributes()->dbname === "INTERPRO") { parent::setDatasetVersion($o->attributes()->version); } } // get a potential id list if (parent::getParameterValue("id_list") != '') { $id_list = explode(",", parent::getParameterValue("id_list")); } // now interate over the entries foreach ($xml->interpro as $o) { parent::writeRDFBufferToWriteFile(); $interpro_id = $o->attributes()->id; if (isset($id_list) && !in_array($interpro_id, $id_list)) { continue; } echo "Processing {$interpro_id}" . PHP_EOL; $name = $o->name; $short_name = $o->attributes()->short_name; $type = $o->attributes()->type; $s = parent::getNamespace() . $interpro_id; //echo "Adding... $s rdfs:label $name ($short_name) $type [$s]".PHP_EOL; parent::addRDF(parent::describeIndividual($s, "{$name} ({$short_name}) {$type}", parent::getVoc() . $type)); // get the pubs unset($pubs); foreach ($o->pub_list->publication as $p) { $pid = (string) $p->attributes()->id; if (isset($p->db_xref)) { if ($p->db_xref->attributes()->db == "PUBMED") { $pmid = (string) $p->db_xref->attributes()->dbkey; $pubs['pid'][] = '<cite idref="' . $pid . '"/>'; $pubs['pmid'][] = '<a href="http://www.ncbi.nlm.nih.gov/pubmed/' . $pmid . '">pubmed:' . $pmid . '</a>'; parent::addRDF(parent::triplify($s, parent::getVoc() . "x-pubmed", "pubmed:{$pmid}")); } } } $abstract = (string) $o->abstract->p->asXML(); if (isset($pubs)) { $abstract = str_replace($pubs['pid'], $pubs['pmid'], $abstract); } parent::addRDF(parent::triplifyString($s, "dc:description", $abstract)); if (isset($o->example_list)) { foreach ($o->example_list->example as $example) { $db = (string) $example->db_xref->attributes()->db; $id = (string) $example->db_xref->attributes()->dbkey; parent::addRDF(parent::triplify($s, parent::getVoc() . "example-entry", "{$db}:{$id}")); } } if (isset($o->parent_list->rel_ref)) { foreach ($o->parent_list->rel_ref as $parent) { $id = (string) $parent->attributes()->ipr_ref; parent::addRDF(parent::triplify($s, parent::getVoc() . "parent", "interpro:{$id}")); } } if (isset($o->child->rel_ref)) { foreach ($o->child->rel_ref as $child) { $id = (string) $child->attributes()->ipr_ref; parent::addRDF(parent::triplify($s, parent::getVoc() . "child", "interpro:{$id}")); } } if (isset($o->contains->rel_ref)) { foreach ($o->contains->rel_ref as $contains) { $id = (string) $contains->attributes()->ipr_ref; parent::addRDF(parent::triplify($s, parent::getVoc() . "contains", "interpro:{$id}")); } } if (isset($o->found_in->rel_ref)) { foreach ($o->found_in->rel_ref as $f) { $id = (string) $f->attributes()->ipr_ref; parent::addRDF(parent::triplify($s, parent::getVoc() . "found-in", "interpro:{$id}")); } } if (isset($o->sec_list->sec_ac)) { foreach ($o->sec_ac as $s) { $id = (string) $s->attributes()->acc; parent::addRDF(parent::triplify($s, parent::getVoc() . "secondary-accession", "interpro:{$id}")); } } // xrefs if (isset($o->member_list->dbxref)) { foreach ($o->member_list->db_xref as $dbxref) { $db = (string) $dbxref->attributes()->db; $id = (string) $dbxref->attributes()->dbkey; parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}")); } } if (isset($o->external_doc_list)) { foreach ($o->external_doc_list->db_xref as $dbxref) { $db = (string) $dbxref->attributes()->db; $id = (string) $dbxref->attributes()->dbkey; parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}")); } } if (isset($o->structure_db_links->db_xref)) { foreach ($o->structure_db_links->db_xref as $dbxref) { $db = (string) $dbxref->attributes()->db; $id = (string) $dbxref->attributes()->dbkey; parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}")); } } // taxon distribution foreach ($o->taxonomy_distribution->taxon_data as $t) { $organism = (string) $t->attributes()->name; $number = (string) $t->attributes()->proteins_count; parent::addRDF(parent::triplifyString($s, parent::getVoc() . "taxon-distribution", "{$organism} ({$number})")); } } }
private function OWL2RDF($abbv) { $filename = parent::getReadFile()->getFilename(); $buf = file_get_contents("compress.zlib://" . $filename); $parser = ARC2::getRDFXMLParser('file://' . $filename); $parser->parse("http://bio2rdf.org/bioportal#", $buf); $triples = $parser->getTriples(); foreach ($triples as $i => $a) { $this->TriplifyMap($a, strtolower($abbv)); parent::writeRDFBufferToWriteFile(); } parent::clear(); }