/** * process a results xml file from the download directory **/ function process_file($infile) { $indir = parent::getParameterValue('indir'); $xml = new CXML($infile); $this->setCheckPoint('file'); while ($xml->Parse("clinical_study") == TRUE) { $this->setCheckPoint('record'); $this->root = $root = $xml->GetXMLRoot(); $this->nct_id = $nct_id = $this->getString("//id_info/nct_id"); $this->study_id = $study_id = parent::getNamespace() . "{$nct_id}"; ### declare $label = $this->getString("//brief_title"); if (!$label) { $label = $this->getString("//official_title"); } if (!$label) { $label = "Clinical trial #" . $nct_id; } parent::addRDF(parent::describeIndividual($study_id, $label, parent::getVoc() . "Clinical-Study") . parent::describeClass(parent::getVoc() . "Clinical-Study", "Clinical Study")); ########################################################################################## #required header ########################################################################################## parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "download-date", $this->getString('//required_header/download_date')) . parent::triplify($study_id, parent::getVoc() . "url", $this->getString('//required_header/url'))); ########################################################################################## #identifiers ########################################################################################## parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "nct-id", $this->getString('//id_info/nct_id'), "xsd:string") . parent::triplifyString($study_id, parent::getVoc() . "org-study-id", $this->getString('//id_info/org_study_id'), "xsd:string")); $sids = $root->xpath('//id_info/secondary_id'); if (isset($sids)) { foreach ($sids as $id) { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "secondary-id", (string) $id, "xsd:string")); } } $nctaliases = $root->xpath('//id_info/nct-alias'); if (isset($nctaliases)) { foreach ($nctaliases as $id) { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "nct-alias", (string) $id, "xsd:string")); } } ########################################################################################## #titles ########################################################################################## parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "brief-title", $this->getString("//brief_title")) . parent::triplifyString($study_id, parent::getVoc() . "official-title", $this->getString("//official_title"))); ################################################################################### #brief summary ################################################################################### $brief_summary = str_replace(array("\r", "\n", "\t"), array("
", "
", "	"), $this->getString('//brief_summary/textblock')); parent::addRDF(parent::triplifyString($study_id, $this->getVoc() . "brief-summary", $brief_summary)); #################################################################################### # detailed description #################################################################################### $d = str_replace(array("\r", "\n", "\t"), array("
", "
", "	"), $this->getString('//detailed_description/textblock')); parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "detailed-description", $d)); ######################################################################################### #acronym ######################################################################################### parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "acronym", $this->getString("//acronym"))); ######################################################################################## #sponsors ######################################################################################## try { $sponsors = array("lead_sponsor", "collaborator"); foreach ($sponsors as $sponsor) { $a = @array_shift($root->xpath('//sponsors/' . $sponsor)); if ($a == null) { break; } $agency = $this->getString("//agency", $a); $agency_id = parent::getRes() . md5($agency); $agency_class = $this->getString("//agency_class", $a); $agency_class_id = parent::getRes() . md5($agency_class); parent::addRDF(parent::describeIndividual($agency_id, $agency, parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $sponsor), $agency_id) . parent::describeIndividual($agency_class_id, $agency_class, parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplify($agency_id, parent::getVoc() . "organization", $agency_class_id)); } } catch (Exception $e) { echo "There was an error in the lead sponsor element: {$e}\n"; } ################################################################################# # source ################################################################################# $source = $this->getString('//source'); if ($source) { $source_id = parent::getRes() . md5($source); parent::addRDF(parent::describeIndividual($source_id, $source, parent::getVoc() . "Organization") . parent::triplify($study_id, parent::getVoc() . "source", $source_id)); } ###################################################################################### # oversight ###################################################################################### try { $oversight = @array_shift($root->xpath('//oversight_info')); $oversight_id = parent::getRes() . md5($oversight->asXML()); $authority = $this->getString('//authority', $oversight); $authority_id = parent::getRes() . md5($authority); parent::addRDF(parent::describeIndividual($oversight_id, $authority, parent::getVoc() . "Organization") . parent::triplify($study_id, $this->getVoc() . "oversight", $oversight_id) . parent::triplify($study_id, $this->getVoc() . "authority", $authority_id) . parent::triplifyString($oversight_id, parent::getVoc() . "has-dmc", $this->getString('//has_dmc', $oversight))); } catch (Exception $e) { echo "There was an error in the oversight info element: {$e}\n"; } ################################################################################# # overall status ################################################################################# $overall_status = $this->getString('//overall_status'); if ($overall_status) { $status_id = parent::getRes() . md5($overall_status); parent::addRDF(parent::describeIndividual($status_id, $overall_status, parent::getVoc() . "Status") . parent::describeClass(parent::getVoc() . "Status", "Status") . parent::triplify($study_id, parent::getVoc() . "overall-status", $status_id)); } ######################################################################################### #why stopped ######################################################################################### parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "why-stopped", $this->getString("//why_stopped"))); ################################################################################## # dates ################################################################################## $dates = array("start_date", "end_date", "completion_date", "primary_completion_date", "verification_date", "lastchanged_date", "firstreceived_date", "firstreceived_results_date"); foreach ($dates as $date) { $d = $this->getString('//' . $date); if ($d) { $datetime = $this->getDatetimeFromDate($d); if (isset($datetime)) { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . str_replace("_", "-", $date), $datetime)); } else { trigger_error("unable to parse date: {$d}", E_USER_ERROR); } } } #################################################################################### # phase #################################################################################### $phase = $this->getString('//phase'); if ($phase && $phase != "N/A") { $phase_id = $this->getRes() . md5($phase); parent::addRDF(parent::describeIndividual($phase_id, $phase, parent::getVoc() . "Phase", $phase) . parent::describeClass(parent::getVoc() . "Phase", $phase) . parent::triplify($study_id, parent::getVoc() . "phase", $phase_id)); } ################################################################################### # study type #################################################################################### $study_type = $this->getString('//study_type'); if ($study_type) { $study_type_id = $this->getRes() . md5($study_type); parent::addRDF(parent::describeClass($study_type_id, $study_type, parent::getVoc() . "Study-Type") . parent::describeClass(parent::getVoc() . "Study-Type", "Study Type") . parent::triplify($study_id, parent::getVoc() . "study-type", $study_type_id)); } ############################################################################### # study design ############################################################################### $study_design = $this->getString('//study_design'); if ($study_design) { $study_design_id = parent::getRes() . md5($study_id . $study_design); parent::addRDF(parent::describeIndividual($study_design_id, "{$study_id} study design", parent::getVoc() . "Study-Design") . parent::describeClass(parent::getVoc() . "Study-Design", "Study Design") . parent::triplify($study_id, parent::getVoc() . "study-design", $study_design_id)); // Intervention Model: Parallel Assignment, Masking: Double-Blind, Primary Purpose: Treatment foreach (explode(", ", $study_design) as $i => $b) { $c = explode(": ", $b); if (isset($c[1])) { $sdp = $study_design_id . "-" . ($i + 1); $key = parent::getRes() . md5($c[0]); $value = parent::getRes() . md5($c[1]); parent::addRDF(parent::describeIndividual($sdp, $b, parent::getVoc() . "Study-Design-Parameter") . parent::describeClass(parent::getVoc() . "Study-Design-Parameter", "Study Design Parameter") . parent::triplify($sdp, parent::getVoc() . "key", $key) . parent::describeClass($key, $c[0]) . parent::triplify($sdp, parent::getVoc() . "value", $value) . parent::describeClass($value, $c[1]) . parent::triplify($study_design_id, parent::getVoc() . "study-design-parameter", $sdp)); } } } #################################################################################### # target duration #################################################################################### parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "target-duration", $this->getString('//target_duration'))); ################################################################################ # outcomes ############################################################################### $outcomes = array("primary_outcome", "secondary_outcome", "other_outcome"); foreach ($outcomes as $outcome) { $o = $root->xpath('//' . $outcome); if ($o) { $os = $o; if (!is_array($o)) { $os = array($o); } foreach ($os as $o) { try { $po_id = parent::getRes() . md5($nct_id . $o->asXML()); $po_type = parent::getVoc() . str_replace("_", "-", $outcome); $measure = $this->getString('//measure', $o); $time_frame = $this->getString('//time_frame', $o); $safety_issue = $this->getString('//saftey_issue', $o); $description = $this->getString('//description', $o); parent::addRDF(parent::describeIndividual($po_id, $measure . " " . $time_frame, ucfirst($po_type)) . parent::describeClass(ucfirst($po_type), str_replace("_", " ", ucfirst($outcome))) . parent::triplifyString($po_id, "dc:description", $description) . parent::triplifyString($po_id, parent::getVoc() . "measure", $measure) . parent::triplifyString($po_id, parent::getVoc() . "time-frame", $time_frame) . parent::triplifyString($po_id, parent::getVoc() . "safety-issue", $safety_issue) . parent::triplify($study_id, parent::getVoc() . $po_type, $po_id)); } catch (Exception $e) { echo "There was an error parsing the primary outcome element: {$e} \n"; } } } } ############################################################################## #number of arms ############################################################################## try { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "number-of-arms", $this->getString('//number_of_arms'))); } catch (Exception $e) { echo "There was an exception parsing the number of arms element: {$e}\n"; } ############################################################################## #number of groups ############################################################################## try { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "number-of-arms", $this->getString('//number_of_groups'))); } catch (Exception $e) { echo "There was an exception parsing the number of groups: {$e}\n"; } ############################################################################## #enrollment ############################################################################## try { $e = $root->xpath('//enrollment'); if ($e) { $type = strtolower((string) $e[0]->attributes()->type); $value = $this->getString('//enrollment'); parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . ($type ? $type . "-" : "") . "enrollment", $value)); } } catch (Exception $e) { echo "There was an exception parsing the enrollment element: {$e}\n"; } ############################################################################### #condition ############################################################################### try { $conditions = $root->xpath('//condition'); foreach ($conditions as $condition) { $mesh_label_id = parent::getRes() . md5($condition); parent::addRDF(parent::triplify($study_id, parent::getVoc() . "condition", $mesh_label_id) . parent::describeClass($mesh_label_id, $condition, parent::getVoc() . "Condition") . parent::describeClass(parent::getVoc() . "Condition", "Condition")); } } catch (Exception $e) { echo "There was an exception parsing condition element: {$e}\n"; } ################################################################################ # arm_group ################################################################################ try { $arm_groups = $root->xpath('//arm_group'); foreach ($arm_groups as $arm_group) { $arm_group_id = $this->getString('./arm_group_label', $arm_group); $arm_group_id = md5($arm_group_id); $arm_group_uri = parent::getRes() . $this->nct_id . "/arm-group/" . $arm_group_id; $arm_group_label = $this->nct_id . " arm group " . $arm_group_id; $arm_group_type = ucfirst(str_replace(" ", "_", $this->getString('./arm_group_type', $arm_group))); if (!$arm_group_type) { $arm_group_type = "Clinical-Arm"; } $description = $this->getString('./description', $arm_group); parent::addRDF(parent::describeIndividual($arm_group_uri, $arm_group_label, parent::getVoc() . $arm_group_type) . parent::describeClass(parent::getVoc() . $arm_group_type, ucfirst(str_replace("_", " ", $arm_group_type))) . parent::triplifyString($arm_group_uri, parent::getVoc() . "description", $description) . parent::describeIndividual($arm_group_uri, $arm_group, parent::getVoc() . "Arm-Group") . parent::describeClass(parent::getVoc() . "Arm-Group", "Arm Group") . parent::triplify($study_id, parent::getVoc() . "arm-group", $arm_group_uri)); } } catch (Exception $e) { echo "There was an exception in arm groups: {$e}\n"; } ############################################################################## #intervention ############################################################################## try { $interventions = $root->xpath('//intervention'); foreach ($interventions as $intervention) { $intervention_id = parent::getRes() . md5($intervention->asXML()); $intervention_name = $this->getString('./intervention_name', $intervention); $intervention_type = $this->getString('./intervention_type', $intervention); $intervention_type_uri = parent::getVoc() . ucfirst(str_replace(" ", "_", $intervention_type)); $intervention_desc = $this->getString('./description', $intervention); $intervention_on = $this->getString('./other_name', $intervention); parent::addRDF(parent::describeIndividual($intervention_id, $intervention_name, $intervention_type_uri) . parent::describeClass($intervention_type_uri, $intervention_type) . parent::triplifyString($intervention_id, parent::getVoc() . "intervention-name", $intervention_name) . parent::triplifyString($intervention_id, parent::getVoc() . "intervention-desc", $intervention_desc) . parent::triplifyString($intervention_id, parent::getVoc() . "other-name", $intervention_on) . parent::triplify($study_id, parent::getvoc() . "intervention", $intervention_id)); $agl = $intervention->xpath("./arm_group_label"); foreach ($agl as $a) { $arm_group_id = md5($a); $ag = parent::getRes() . $this->nct_id . "/arm-group/" . $arm_group_id; parent::addRDF(parent::describeIndividual($ag, $a, parent::getVoc() . "Arm-Group") . parent::describeClass(parent::getVoc() . "Arm-Group", "Arm Group") . parent::triplify($intervention_id, parent::getVoc() . "arm-group", $ag)); } } } catch (Exception $e) { echo "There was an error in interventions {$e}\n"; } ############################################################################### #eligibility ################################################################################ try { $eligibility = @array_shift($root->xpath('//eligibility')); if ($eligibility !== null) { $eligibility_label = "eligibility for " . $study_id; $eligibility_id = parent::getRes() . md5($eligibility->asXML()); parent::addRDF(parent::describeIndividual($eligibility_id, $eligibility_label, parent::getVoc() . "Eligibility") . parent::describeClass(parent::getVoc() . "Eligibility", "Eligibility") . parent::triplify($study_id, parent::getVoc() . "eligibility", $eligibility_id)); if ($criteria = @array_shift($eligibility->xpath('./criteria'))) { $text = @array_shift($criteria->xpath('./textblock')); parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . "text", $text)); $c = preg_split("/(Inclusion Criteria\\:|Exclusion Criteria\\:)/", $text); //inclusion if (isset($c[1])) { $d = explode(" - ", $c[1]); // the lists are separated by a hyphen foreach ($d as $inclusion) { $inc = trim($inclusion); if ($inc != '') { $inc_id = parent::getRes() . md5($inc); parent::addRDF(parent::describeIndividual($inc_id, $inc, parent::getVoc() . "Inclusion-Criteria") . parent::describeClass(parent::getVoc() . "Inclusion-Criteria", "Inclusion Criteria") . parent::triplify($eligibility_id, parent::getVoc() . "inclusion-criteria", $inc_id)); } } } //exclusion if (isset($c[2])) { $d = explode(" - ", $c[1]); foreach ($d as $exclusion) { $exc = trim($exclusion); if ($exc != '') { $exc_id = parent::getRes() . md5($exc); parent::addRDF(parent::describeIndividual($exc_id, $exc, parent::getVoc() . "Exclusion-Criteria") . parent::describeClass(parent::getVoc() . "Exclusion-Criteria", "Exclusion Criteria") . parent::triplify($eligibility_id, parent::getVoc() . "exclusion-criteria", $exc_id)); } } } } parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . "gender", $this->getString('./gender', $eligibility))); parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . "healthy-volunteers", $this->getString('./healthy_volunteers', $eligibility))); $attributes = array('minimum_age', 'maximum_age'); foreach ($attributes as $a) { $s = $this->getString('./' . $a, $eligibility); if ($s != 'N/A') { $age = trim(str_replace("Years", "", $s)); parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . str_replace("_", "-", $a), $age)); } } $attributes = array("study_pop" => "study-population", "sampling_method" => "sampling-method"); foreach ($attributes as $a => $r) { $e = @array_shift($eligibility->xpath('./' . $a)); if ($s = $this->getString('./' . $a, $eligibility)) { parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . $r, $this->getString('./textblock', $e))); } } } } catch (Exception $e) { echo "There was an error in eligibility: {$e}\n"; } ###################################################################################### #biospec ##################################################################################### parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "biospec-retention", $this->getString('//biospec_retention'))); try { $b = @array_shift($root->xpath('//biospec_descr')); if ($b) { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "biospec_descr", $this->getString('./textblock', $b))); } } catch (Exception $e) { echo "There was an error in biospec_descr: {$e}\n"; } ################################################################### # contacts ################################################################### $contacts = array("overall_official", "overall_contact", "overall_contact_backup"); try { foreach ($contacts as $c) { $d = @array_shift($root->xpath('//' . $c)); if ($d) { parent::addRDF(parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $c), $this->makeContact($d))); } } } catch (Exception $e) { echo "There was an error parsing overall contact: {$e}" . "\n"; } ############################################################## # location of facility doing the testing ############################################################## try { $location = @array_shift($root->xpath('//location')); if ($location) { $location_uri = parent::getRes() . md5($location->asXML()); $name = $this->getString('//facility/name', $location); $address = @array_shift($location->xpath('//facility/address')); $contact = @array_shift($location->xpath('//contact')); $backups = @array_shift($location->xpath('//contact_backup')); $investigators = @array_shift($location->xpath('//investigator')); parent::addRDF(parent::describeIndividual($location_uri, $name, parent::getVoc() . "Location") . parent::describeClass(parent::getVoc() . "Location", "Location") . parent::triplifyString($location_uri, parent::getVoc() . "status", $this->getString('//status', $location)) . parent::triplify($study_id, parent::getVoc() . "location", $location_uri) . parent::triplify($location_uri, parent::getVoc() . "address", $this->makeAddress($address)) . ($contact != null ? parent::triplify($location_uri, parent::getVoc() . "contact", $this->makeContact($contact)) : "")); if ($backups) { foreach ($backups as $backup) { parent::addRDF(parent::triplify($location_uri, parent::getVoc() . "contact-backup", $this->makeContact($backup))); } } if ($investigators) { foreach ($investigators as $investigator) { parent::addRDF(parent::triplify($location_uri, parent::getVoc() . "investigator", $this->makeContact($investigator))); } } } } catch (Exception $e) { echo "There was an error parsing location: {$e}" . "\n"; } ###################################################################### #countries ###################################################################### try { $a = array("location_countries", "removed_countries"); foreach ($a as $country) { $lc = @array_shift($root->xpath('//' . $country)); if ($lc) { $label = $this->getString('//country', $lc); $cid = parent::getRes() . md5($label); parent::addRDF(parent::describeIndividual($cid, $label, parent::getVoc() . "Country") . parent::describeClass(parent::getVoc() . "Country", "Country") . parent::triplify($study_id, parent::getVoc() . "country", $cid)); } } } catch (Exception $e) { echo "There was an error parsing country: {$e}" . "\n"; } ###################################################################### #reference ###################################################################### try { $a = array("reference", "result_reference"); foreach ($a as $ref_type) { $references = $root->xpath('//' . $ref_type); foreach ($references as $reference) { $p = $this->getString('./PMID', $reference); if ($p) { $pmid = "pubmed:{$p}"; parent::addRDF(parent::describeIndividual($pmid, $p, parent::getVoc() . "Reference") . parent::describeClass(parent::getVoc() . "Reference", "Reference") . parent::triplifyString($pmid, parent::getVoc() . "citation", $this->getString('./citation', $reference)) . parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $ref_type), $pmid)); } } } } catch (Exception $e) { echo "There was an error parsing references element: {$e}\n"; } ####################################################################### #link ####################################################################### try { $links = $root->xpath('//link'); foreach ($links as $i => $link) { $url = $this->getString('./url', $link); $url = preg_replace("/>.*\$/", "", $url); $lid = parent::getRes() . md5($url); parent::addRDF(parent::describeIndividual($lid, $this->getString('./description', $link), parent::getVoc() . "Link") . parent::describeClass(parent::getVoc() . "Link", "Link") . parent::triplify($lid, parent::getVoc() . "url", $url) . parent::triplify($study_id, parent::getVoc() . "link", $lid)); } } catch (Exception $e) { echo "There was an error parsing link element: {$e}\n"; } ############################################################################ #responsible party ############################################################################ try { $rp = @array_shift($root->xpath('//responsible_party')); if ($rp) { $rp_id = parent::getRes() . md5($rp->asXML()); $label = $this->getString('./name_title', $rp); if (!$label) { $label = $this->getString('./organization', $rp); } else { $label .= ", " . $this->getString('./organization', $rp); } if (!$label) { $label = $this->getString('./party_type', $rp); } $org_id = parent::getRes() . md5($this->getString('./organization', $rp)); parent::addRDF(parent::describeIndividual($rp_id, $label, parent::getVoc() . "Responsible-Party") . parent::describeClass(parent::getVoc() . "Responsible-Party", "Responsible Party") . parent::triplify($study_id, parent::getVoc() . "responsible-party", $rp_id) . parent::triplify($rp_id, parent::getVoc() . "organization", $org_id) . parent::describeIndividual($org_id, $this->getString('./organization', $rp), parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplifyString($rp_id, parent::getVoc() . "name-title", $this->getString('./name_title', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "party-type", $this->getString('./party_type', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "investigator-affiliation", $this->getString('./investigator_affiliation', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "investigator-full-name", $this->getString('./investigator_full_name', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "investigator-title", $this->getString('./investigator_title', $rp))); } } catch (Exception $e) { echo "There was an error parsing the responsible_party element: {$e}\n"; } ############################################################################## # keywords ############################################################################## try { $keywords = $root->xpath('//keyword'); foreach ($keywords as $keyword) { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "keyword", (string) $keyword)); } } catch (Exception $e) { echo "There was an error parsing the keywords element: {$e}"; } # mesh terms # note: mesh terms are assigned using an imperfect algorithm try { $mesh_terms = $root->xpath('//condition_browse/mesh_term'); foreach ($mesh_terms as $mesh_term) { $term = (string) $mesh_term; $mesh_id = parent::getRes() . md5($term); parent::addRDF(parent::triplify($study_id, parent::getVoc() . "condition-mesh", $mesh_id)); parent::addRDF(parent::triplifyString($mesh_id, "rdfs:label", $term)); } } catch (Exception $e) { echo "There was an error in mesh_terms: {$e}\n"; } ################################################################################ # regulated by fda? is section 801? has expanded access? ################################################################################ try { parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "is-fda-regulated", $this->getString('is_fda_regulated')) . parent::triplifyString($study_id, parent::getVoc() . "is-section-801", $this->getString('is_section_801')) . parent::triplifyString($study_id, parent::getVoc() . "has-expanded-access", $this->getString('has_expanded_access'))); } catch (Exception $e) { echo "There was an error parsing the is_fda_regulated element: {$e}\n"; } ############################################################################### # mesh terms for the intervention browse ############################################################################### try { $a = array("condition_browse", "intervention_browse"); foreach ($a as $browse_type) { $terms = $root->xpath("//{$browse_type}/mesh_term"); foreach ($terms as $term) { $term_label = (string) $term; $term_id = parent::getRes() . md5($term); parent::addRDF(parent::describeIndividual($term_id, $term_label, parent::getVoc() . "Term") . parent::describeClass(parent::getVoc() . "Term", "Term") . parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $browse_type), $term_id)); } } } catch (Exception $e) { echo "There was an error parsing {$browse_type}/mesh_term element: {$e}\n"; } ################################################################################ # clinical results ################################################################################ try { $cr = @array_shift($root->xpath('//clinical_results')); if ($cr) { $cr_id = parent::getRes() . md5($study_id . $cr->asXML()); parent::addRDF(parent::describeIndividual($cr_id, "clinical results for {$study_id}", parent::getVoc() . "Clinical-Result") . parent::describeClass(parent::getVoc() . "Clinical-Result", "Clinical Result") . parent::triplifyString($cr_id, parent::getVoc() . "description", $this->getString('./desc', $cr)) . parent::triplifyString($cr_id, parent::getVoc() . "restrictive-agreement", $this->getString('./restrictive_agreement', $cr)) . parent::triplifyString($cr_id, parent::getVoc() . "limitations-and-caveats", $this->getString('./limitations_and_caveats', $cr)) . parent::triplify($study_id, parent::getVoc() . "clinical-result", $cr_id)); } } catch (Exception $e) { echo "There was an error parsing clinical results: {$e}\n"; } ################################################################################ # Participant Flow ################################################################################ try { $pc = 1; $mc = 1; $wc = 1; $pf = @array_shift($root->xpath('//clinical_results/participant_flow')); if ($pf) { $pf_id = parent::getRes() . md5($pf->asXML()); parent::addRDF(parent::describeIndividual($pf_id, "participant flow for {$study_id}", parent::getVoc() . "Participant-Flow") . parent::describeClass(parent::getVoc() . "Participant-Flow", "Participant-Flow") . parent::triplify($study_id, parent::getVoc() . "participant-flow", $pf_id) . parent::triplifyString($pf_id, parent::getVoc() . "recruitment-details", $this->getString('./recruitment_details', $pf)) . parent::triplifyString($pf_id, parent::getVoc() . "pre-assignment-details", $this->getString('./pre_assignment_details', $pf))); $groups = @array_shift($pf->xpath('./group_list')); foreach ($groups as $group) { parent::addRDF(parent::triplify($pf_id, parent::getVoc() . "group", $this->makeGroup($group))); } //period_list $periods = @array_shift($pf->xpath('./period_list')); foreach ($periods as $period) { $period_id = parent::getRes() . $nct_id . "/period/" . $pc++; $period_title = $this->getString('./title', $period); parent::addRDF(parent::describeIndividual($period_id, $period_title . " for {$nct_id}", parent::getVoc() . "Period") . parent::describeClass(parent::getVoc() . "Period", "Period") . parent::triplify($pf_id, parent::getVoc() . "period", $period_id)); // milestones $milestones = @array_shift($period->xpath('./milestone_list')); if ($milestones) { foreach ($milestones as $milestone) { $milestone_id = parent::getRes() . $nct_id . "/milestone/" . $mc++; $label = $this->getString('./title', $milestone); parent::addRDF(parent::describeIndividual($milestone_id, $label, parent::getVoc() . "Milestone") . parent::describeClass(parent::getVoc() . "Milestone", "Milestone") . parent::triplify($period_id, parent::getVoc() . "milestone", $milestone_id)); // participants $p = 1; $ps_list = @array_shift($milestone->xpath('./participants_list')); foreach ($ps_list as $ps) { $ps_id = $milestone_id . "/p/" . $p++; $group_id = parent::getRes() . $this->nct_id . "/group/" . $ps->attributes()->group_id; $count = (string) $ps->attributes()->count; parent::addRDF(parent::describeIndividual($ps_id, "participant counts in " . $ps->attributes()->group_id . " for milestone {$mc} of {$nct_id}", parent::getVoc() . "Participant-Count") . parent::describeClass(parent::getVoc() . "Participant-Count", "Participant Count") . parent::triplify($ps_id, parent::getVoc() . "group", $group_id) . parent::triplifyString($ps_id, parent::getVoc() . "count", $count) . parent::triplify($milestone_id, parent::getVoc() . "participant-counts", $ps_id)); } } } // milestones $withdraws = @array_shift($period->xpath('./drop_withdraw_reason_list')); if ($withdraws) { foreach ($withdraws as $withdraw) { $wid = parent::getRes() . $this->nct_id . "/withdraw/" . $wc++; $label = $this->getString('./title', $withdraw); parent::addRDF(parent::describeIndividual($wid, $label, parent::getVoc() . "Withdraw-Reason") . parent::describeClass(parent::getVoc() . "Withdraw-Reason", "Withdraw Reason")); // participants $ps_list = @array_shift($withdraw->xpath('./participants_list')); foreach ($ps_list as $ps) { $group_id = parent::getRes() . $nct_id . "/group/" . $ps->attributes()->group_id; $count = (string) $ps->attributes()->count; parent::addRDF(parent::triplify($wid, parent::getVoc() . "group", $group_id) . parent::triplifyString($wid, parent::getVoc() . "count", $count)); } } } } } } catch (Exception $e) { echo "There was an error parsing participant flow element: {$e}\n"; } ################################################################################ # baseline ################################################################################ try { $baseline = @array_shift($root->xpath('//baseline')); if ($baseline) { $b_id = $this->nct_id . "/baseline"; $b_uri = parent::getRes() . $b_id; // group list $groups = @array_shift($baseline->xpath('./group_list')); foreach ($groups as $group) { parent::addRDF(parent::describeIndividual($b_uri, "baseline for {$nct_id}", parent::getVoc() . "Baseline") . parent::describeClass(parent::getVoc() . "Baseline", "Baseline") . parent::triplify($b_uri, parent::getVoc() . "group", $this->makeGroup($group)) . parent::triplify($study_id, parent::getVoc() . "baseline", $b_uri)); } // measure list $measures = @array_shift($baseline->xpath('./measure_list')); foreach ($measures as $measure) { parent::addRDF(parent::triplify($b_uri, parent::getVoc() . "measure", $this->makeMeasure($measure))); } } } catch (Exception $e) { echo "Error in parsing baseline" . PHP_EOL; } ################################################################################ # outcomes ################################################################################ try { $outcomes = @array_shift($root->xpath('//outcome_list')); if ($outcomes) { foreach ($outcomes as $i => $outcome) { $outcome_id = $this->nct_id . "/outcome/" . ($i + 1); $outcome_uri = parent::getRes() . $outcome_id; $outcome_label = $this->getString("./title", $outcome); if (!$outcome_label) { $outcome_label = "outcome for " . $this->nct_id; } parent::addRDF(parent::describeIndividual($outcome_uri, $outcome_label, parent::getVoc() . "Outcome", $this->getString("./description", $outcome)) . parent::describeClass(parent::getVoc() . "Outcome", "Outcome") . parent::triplify($study_id, parent::getVoc() . "outcome", $outcome_uri) . parent::triplifyString($outcome_uri, parent::getVoc() . "type", $this->getString("./type", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "time-frame", $this->getString("./time_frame", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "safety-issue", $this->getString("./safety_issue", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "posting-date", $this->getString("./posting-date", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "population", $this->getString("./population", $outcome))); $groups = @array_shift($outcome->xpath('./group_list')); if ($groups) { foreach ($groups as $group) { parent::addRDF(parent::triplify($outcome_uri, parent::getVoc() . "group", $this->makeGroup($group))); } } // measure list $measures = @array_shift($outcome->xpath('./measure_list')); if ($measures) { foreach ($measures as $measure) { parent::addRDF(parent::triplify($outcome_uri, parent::getVoc() . "measure", $this->makeMeasure($measure))); } } // analysis list $analyses = @array_shift($outcome->xpath('./analysis_list')); if ($analyses) { foreach ($analyses as $analysis) { parent::addRDF(parent::triplify($outcome_uri, parent::getVoc() . "analysis", $this->makeAnalysis($analysis))); } } } } } catch (Exception $e) { echo "Error in parsing outcomes" . PHP_EOL; } ################################################################################ # events ################################################################################ try { $c_ev = $c_c = 1; $reported_events = @array_shift($root->xpath('//reported_events')); if ($reported_events) { $rp_id = parent::getRes() . md5($reported_events->asXML()); $groups = @array_shift($reported_events->xpath('./group_list')); parent::addRDF(parent::describeIndividual($rp_id, "Reported events for {$nct_id}", parent::getVoc() . "Reported-Events") . parent::describeClass(parent::getVoc() . "Reported-Events", "Reported Events") . parent::triplify($study_id, parent::getVoc() . "reported-events", $rp_id)); foreach ($groups as $group) { parent::addRDF(parent::triplify($rp_id, parent::getVoc() . "group", $this->makeGroup($group))); } // events $event_list = array("serious_events" => "Serious Event", "other_events" => "Other Event"); foreach ($event_list as $ev => $ev_label) { $et = @array_shift($reported_events->xpath('./' . $ev)); if (!$et) { continue; } $ev_uri = parent::getVoc() . str_replace(" ", "-", $ev_label); $categories = @array_shift($et->xpath('./category_list')); foreach ($categories as $category) { $major_title = $this->getString('./title', $category); $major_title_uri = parent::getRes() . md5($major_title); $events = @array_shift($category->xpath('./event_list')); foreach ($events as $event) { $e_uri = parent::getRes() . $this->nct_id . "/{$ev}/" . $c_ev++; $subtitle = (string) $this->getString('./sub_title', $event) . " for " . $this->nct_id; $subtitle_uri = parent::getRes() . md5($subtitle); parent::addRDF(parent::describeIndividual($e_uri, $subtitle, $ev_uri) . parent::describeClass($ev_uri, $ev_label) . parent::triplify($e_uri, parent::getVoc() . "sub-title", $subtitle_uri) . parent::describeIndividual($subtitle_uri, $subtitle, parent::getVoc() . "Event") . parent::describeClass(parent::getVoc() . "Event", "Event") . parent::triplify($e_uri, parent::getVoc() . "major-title", $major_title_uri) . parent::describeClass($major_title_uri, $major_title) . parent::triplify($rp_id, parent::getVoc() . str_replace("_", "-", $ev), $e_uri)); $counts = $event->xpath('./counts'); foreach ($counts as $c) { $group_id = $c->attributes()->group_id; $group_uri = parent::getRes() . $nct_id . "/group/" . $group_id; $c_uri = $e_uri . "/count/" . $c_c++; parent::addRDF(parent::describeIndividual($c_uri, $subtitle . " for " . $group_id . " in " . $this->nct_id, parent::getVoc() . "Event-Count") . parent::describeClass(parent::getVoc() . "Event-Count", "Event Count") . parent::triplify($c_uri, parent::getVoc() . "group", $group_uri) . parent::triplify($e_uri, parent::getVoc() . "count", $c_uri) . parent::triplifyString($c_uri, parent::getVoc() . "default-vocabulary", $this->getString('./default_vocab', $et)) . parent::triplifyString($c_uri, parent::getVoc() . "frequency-threshold", $this->getString('./frequency_threshold', $et)) . parent::triplifyString($c_uri, parent::getVoc() . "default-assessment", $this->getString('./default_assessment', $et)) . parent::triplifyString($c_uri, parent::getVoc() . "number-events", $c->attributes()->events) . parent::triplifyString($c_uri, parent::getVoc() . "subjects-affected", $c->attributes()->subjects_affected) . parent::triplifyString($c_uri, parent::getVoc() . "subjects-at-risk", $c->attributes()->subjects_at_risk)); } } } } } } catch (Exception $e) { echo "Error in parsing reported events" . PHP_EOL; } parent::writeRDFBufferToWriteFile(); } $this->setCheckPoint('record'); $this->setCheckPoint('dataset'); }
function Parse($xml) { // state the dataset info foreach ($xml->release->dbinfo as $o) { $db = $o->attributes()->dbname . " v" . $o->attributes()->version . " (" . $o->attributes()->entry_count . " entries) [" . $o->attributes()->file_date . "]"; parent::addRDF(parent::triplifyString(parent::getDatasetURI(), parent::getVoc() . "contains", $db)); if ((string) $o->attributes()->dbname === "INTERPRO") { parent::setDatasetVersion($o->attributes()->version); } } // get a potential id list if (parent::getParameterValue("id_list") != '') { $id_list = explode(",", parent::getParameterValue("id_list")); } // now interate over the entries foreach ($xml->interpro as $o) { parent::writeRDFBufferToWriteFile(); $interpro_id = $o->attributes()->id; if (isset($id_list) && !in_array($interpro_id, $id_list)) { continue; } echo "Processing {$interpro_id}" . PHP_EOL; $name = $o->name; $short_name = $o->attributes()->short_name; $type = $o->attributes()->type; $s = parent::getNamespace() . $interpro_id; //echo "Adding... $s rdfs:label $name ($short_name) $type [$s]".PHP_EOL; parent::addRDF(parent::describeIndividual($s, "{$name} ({$short_name}) {$type}", parent::getVoc() . $type)); // get the pubs unset($pubs); foreach ($o->pub_list->publication as $p) { $pid = (string) $p->attributes()->id; if (isset($p->db_xref)) { if ($p->db_xref->attributes()->db == "PUBMED") { $pmid = (string) $p->db_xref->attributes()->dbkey; $pubs['pid'][] = '<cite idref="' . $pid . '"/>'; $pubs['pmid'][] = '<a href="http://www.ncbi.nlm.nih.gov/pubmed/' . $pmid . '">pubmed:' . $pmid . '</a>'; parent::addRDF(parent::triplify($s, parent::getVoc() . "x-pubmed", "pubmed:{$pmid}")); } } } $abstract = (string) $o->abstract->p->asXML(); if (isset($pubs)) { $abstract = str_replace($pubs['pid'], $pubs['pmid'], $abstract); } parent::addRDF(parent::triplifyString($s, "dc:description", $abstract)); if (isset($o->example_list)) { foreach ($o->example_list->example as $example) { $db = (string) $example->db_xref->attributes()->db; $id = (string) $example->db_xref->attributes()->dbkey; parent::addRDF(parent::triplify($s, parent::getVoc() . "example-entry", "{$db}:{$id}")); } } if (isset($o->parent_list->rel_ref)) { foreach ($o->parent_list->rel_ref as $parent) { $id = (string) $parent->attributes()->ipr_ref; parent::addRDF(parent::triplify($s, parent::getVoc() . "parent", "interpro:{$id}")); } } if (isset($o->child->rel_ref)) { foreach ($o->child->rel_ref as $child) { $id = (string) $child->attributes()->ipr_ref; parent::addRDF(parent::triplify($s, parent::getVoc() . "child", "interpro:{$id}")); } } if (isset($o->contains->rel_ref)) { foreach ($o->contains->rel_ref as $contains) { $id = (string) $contains->attributes()->ipr_ref; parent::addRDF(parent::triplify($s, parent::getVoc() . "contains", "interpro:{$id}")); } } if (isset($o->found_in->rel_ref)) { foreach ($o->found_in->rel_ref as $f) { $id = (string) $f->attributes()->ipr_ref; parent::addRDF(parent::triplify($s, parent::getVoc() . "found-in", "interpro:{$id}")); } } if (isset($o->sec_list->sec_ac)) { foreach ($o->sec_ac as $s) { $id = (string) $s->attributes()->acc; parent::addRDF(parent::triplify($s, parent::getVoc() . "secondary-accession", "interpro:{$id}")); } } // xrefs if (isset($o->member_list->dbxref)) { foreach ($o->member_list->db_xref as $dbxref) { $db = (string) $dbxref->attributes()->db; $id = (string) $dbxref->attributes()->dbkey; parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}")); } } if (isset($o->external_doc_list)) { foreach ($o->external_doc_list->db_xref as $dbxref) { $db = (string) $dbxref->attributes()->db; $id = (string) $dbxref->attributes()->dbkey; parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}")); } } if (isset($o->structure_db_links->db_xref)) { foreach ($o->structure_db_links->db_xref as $dbxref) { $db = (string) $dbxref->attributes()->db; $id = (string) $dbxref->attributes()->dbkey; parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}")); } } // taxon distribution foreach ($o->taxonomy_distribution->taxon_data as $t) { $organism = (string) $t->attributes()->name; $number = (string) $t->attributes()->proteins_count; parent::addRDF(parent::triplifyString($s, parent::getVoc() . "taxon-distribution", "{$organism} ({$number})")); } } }
private function gene2accession() { $this->getReadFile()->read(200000); $header = array(0 => array('rel' => "x-taxonomy", 'ns' => "taxonomy"), 1 => array('rel' => "ncbigene", 'ns' => "ncbigene"), 2 => array('rel' => "status"), 3 => array('rel' => "rna-nucleotide-accession.version", 'ns' => "genbank"), 4 => array('rel' => "rna-nucleotide-gi", 'ns' => "gi"), 5 => array('rel' => "protein-accession.version", 'ns' => "genbank"), 6 => array('rel' => "protein-gi", 'ns' => "gi"), 7 => array('rel' => "genomic-nucleotide-accession.version", 'ns' => "genbank"), 8 => array('rel' => "genomic-nucleotide-gi", 'ns' => "gi"), 9 => array('rel' => "genomic-start-position"), 10 => array('rel' => "genomic-end-position"), 11 => array('rel' => "orientation"), 12 => array('rel' => "assembly"), 13 => array('rel' => "mature-peptide-accession.version", 'ns' => "genbank"), 14 => array('rel' => "mature-peptide-gi", 'ns' => "gi"), 15 => array('rel' => "symbol")); //(tab is used as a separator, pound sign - start of a comment) */ $z = 1; while ($l = $this->getReadFile()->read(200000)) { if ($l[0] == "#") { continue; } if ($z++ % 10000 == 0) { echo $z . PHP_EOL; parent::clear(); } $a = explode("\t", rtrim($l)); if (count($a) != 16) { trigger_error("gene2accession: expecting 16 columns, found " . count($a) . " instead", E_USER_ERROR); } $taxid = $a[0]; if (isset($this->taxids) and !isset($this->taxids[$taxid])) { continue; } $id = parent::getNamespace() . $a[1]; $refseq = false; if ($a[2] != '-') { $refseq = true; } if ($a[9] != '-' and $a[10] != '-') { $region = parent::getRes() . $a[7] . "/" . $a[9] . "-" . $a[10]; $start_pos = parent::getRes() . $a[7] . "/" . $a[9]; $stop_pos = parent::getRes() . $a[7] . "/" . $a[10]; if ($a[11] == "+") { $orientation = "faldo:ForwardStrandPosition"; } else { if ($a[11] == "-") { $orientation = "faldo:ReverseStrandPosition"; } else { $orientation = "faldo:StrandedPosition"; } } parent::addRDF(parent::describeIndividual($region, "location of ncbigene:" . $a[1] . " on " . $a[7], "faldo:Region") . parent::describeIndividual($start_pos, "start of ncbigene:" . $a[1] . " on " . $a[7], "faldo:ExactPosition") . parent::describeIndividual($stop_pos, "stop position of ncbigene:" . $a[1] . " on " . $a[7], "faldo:ExactPosition") . parent::triplify($id, "faldo:location", $region) . parent::triplify($region, "faldo:begin", $start_pos) . parent::triplify($start_pos, "rdf:type", $orientation) . parent::triplifyString($start_pos, "faldo:position", $a[9], "xsd:integer") . parent::triplify($start_pos, "faldo:reference", "refseq:" . $a[7]) . parent::triplify($region, "faldo:end", $stop_pos) . parent::triplify($stop_pos, "rdf:type", $orientation) . parent::triplifyString($stop_pos, "faldo:position", $a[10], "xsd:integer") . parent::triplify($stop_pos, "faldo:reference", "refseq:" . $a[7])); } foreach ($header as $i => $v) { if ($a[$i] == "-") { continue; } if ($i == 1 or $i == 9 or $i == 10 or $i == 11) { continue; } /// ncbigene if (isset($v['ns'])) { $ns = $v['ns']; if ($ns == 'genbank' and $refseq == true) { $ns = 'refseq'; } parent::addRDF(parent::triplify($id, parent::getVoc() . $v['rel'], "{$ns}:" . $a[$i])); } else { parent::addRDF(parent::triplifyString($id, parent::getVoc() . $v['rel'], $a[$i])); } } parent::writeRDFBufferToWriteFile(); } //while }
function gene_interactions() { while ($l = parent::getReadFile()->Read()) { if ($l[0] == '#') { continue; } $data = explode("\t", $l); if (count($data) != 11) { trigger_error("Found " . count($data) . " columns, expecting 11"); continue; } $interaction = $data[0]; $interaction_type = str_replace("_", "-", $data[1]); $interaction_type_label = str_replace("_", " ", $data[1]); $int_additional_info = $data[2]; $gene1 = $data[5]; $gene2 = $data[8]; $interaction_id = parent::getNamespace() . $interaction; if ($interaction_type == "Genetic") { $int_pred = parent::getVoc() . "genetically-interacts-with"; } elseif ($interaction_type == "Physical") { $int_pred = parent::getVoc() . "physically-interacts-with"; } elseif ($interaction_type == "Predicted") { $int_pred = parent::getVoc() . "predicted-to-interact-with"; } elseif ($interaction_type == "Regulatory") { $int_pred = parent::getVoc() . "regulates"; } //elseif if ($int_additional_info == "No_interaction") { $interaction_label = "No " . strtolower($interaction_type) . " interaction between " . $gene1 . " and " . $gene2; parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, parent::getVoc() . $interaction_type . "-Non-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Non-Interaction", $interaction_type_label . " non-interaction") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2)); $npa_id = parent::getRes() . md5($interaction_id . "negative property assertion"); $npa_label = "Negative property assertion stating that " . $gene1 . " and " . $gene2 . " do not have a " . $interaction_type_label . " interaction"; parent::addRDF(parent::describeIndividual($npa_id, $npa_label, "owl:NegativeObjectPropertyAssertion") . parent::triplify($npa_id, "owl:sourceIndividual", parent::getNamespace() . $gene1) . parent::triplify($npa_id, "owl:targetIndividual", parent::getNamespace() . $gene2) . parent::triplify($npa_id, "owl:assertionProperty", $int_pred)); } elseif ($int_additional_info == "N/A" || $int_additional_info == "Genetic_interaction") { $interaction_label = $interaction_type . " interaction between " . $gene1 . " and " . $gene2; parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, parent::getVoc() . $interaction_type . "-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Interaction", $interaction_type_label . " Interaction") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2) . parent::triplify(parent::getNamespace() . $gene1, $int_pred, parent::getNamespace() . $gene2)); } else { $interaction_label = ($int_additional_info != "" ? $int_additional_info . " " : "") . strtolower($interaction_type) . " interaction between " . $gene1 . " and " . $gene2; $type = parent::getVoc() . ($int_additional_info != "" ? $int_additional_info . "-" : "") . $interaction_type . "-Interaction"; $type_label = ($int_additional_info != "" ? $int_additional_info . " " : "") . $interaction_type_label . " Interaction"; parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, $type) . parent::describeClass($type, $type_label, parent::getVoc() . $interaction_type . "-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Interaction", $interaction_type . " Interation") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2) . parent::triplify(parent::getNamespace() . $gene1, $int_pred, parent::getNamespace() . $gene2)); } //else parent::WriteRDFBufferToWriteFile(); } //while }
function pathways() { // needs to be finished return; while ($l = $this->getReadFile()->read(50000)) { $a = explode("\t", trim($l)); // From To Reaction Type Controller Control Type Cell Type PubMed Id Genes Drugs Diseases // hmg coa reductase inhibitors Active & Inactive metabolites Biochemical Reaction CYP2C19,CYP2C8,CYP2C9,CYP2D6,CYP3A4,CYP3A5,UGT1A1,UGT1A3,UGT2B7 Catalysis hepatocyte CYP3A4,CYP3A5,UGT1A3,CYP2C19,CYP2C9,CYP2C8,CYP2D6,UGT1A1,UGT2B7 hmg coa reductase inhibitors $c1 = array_search($a[0], $this->drugs); if ($c1 === FALSE) { $c1 = array_search($a[0], $this->genes); if ($c1 === FALSE) { $c1 = parent::getRes() . url_encode($c1); } else { $c1 = parent::getNamespace() . $c1; } } $c2 = array_search($a[1], $this->drugs); if ($c2 === FALSE) { $c2 = array_search($a[1], $this->genes); if ($c2 === FALSE) { // not found $c2 = parent::getRes() . url_encode($c2); } else { // actual id $c2 = parent::getNamespace() . $c2; } } $id = md5($l); $uri = parent::getRes() . $id; parent::writeRDFBufferToWriteFile(); } }
function product($fpin) { $z = 0; $list = ''; fgets($fpin); // header while ($l = fgets($fpin, 100000)) { $a = explode("\t", $l); if (count($a) != 18) { trigger_error("Expected 18 coloumns, instead found" . count($a)); continue; } $product_id = parent::getNamespace() . $a[0]; $product_label = $a[3]; $product_type_label = ucfirst(strtolower($a[2])); $product_type = parent::getVoc() . str_replace(" ", "-", $product_label); parent::addRDF(parent::describeIndividual($product_id, $a[3], parent::getVoc() . "Product") . parent::describeClass(parent::getVoc() . "Product", "NDC Product") . parent::triplify($product_id, parent::getVoc() . "product-type", $product_type) . parent::describeIndividual($product_type, $product_type_label, parent::getVoc() . "Product-Type") . parent::describeClass(parent::getVoc() . "Product-Type", "Product Type") . parent::triplifyString($product_id, parent::getVoc() . "product-id", $a[1]) . parent::triplifyString($product_id, parent::getVoc() . "proprietary-name", $a[3]) . parent::triplifyString($product_id, parent::getVoc() . "trade-name-suffix", $a[4])); if ($a[5]) { $b = explode(";", $a[5]); foreach ($b as $c) { parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "non-proprietary-name", trim($c))); } } if ($a[6]) { $b = explode(",", $a[6]); foreach ($b as $c) { $dosageform = strtolower($c); $dosageform_id = parent::getVoc() . str_replace(" ", "-", ucfirst(strtolower($c))); parent::addRDF(parent::describeIndividual($dosageform_id, $dosageform, parent::getVoc() . "Dosage-Form") . parent::describeClass(parent::getVoc() . "Dosage-Form", "NDC Dosage Form") . parent::triplify($product_id, parent::getVoc() . "dosage-form", $dosageform_id)); } } if ($a[7]) { // MV $b = explode("; ", $a[7]); foreach ($b as $c) { $route = strtolower(trim($c)); $route_id = parent::getVoc() . str_replace(" ", "-", ucfirst(strtolower($c))); parent::addRDF(parent::describeIndividual($route_id, $route, parent::getVoc() . "Route") . parent::describeClass(parent::getVoc() . "Route", "NDC Drug Route") . parent::triplify($product_id, parent::getVoc() . "route", $route_id)); } } if ($a[8]) { $date = substr(0, 4, $a[8]) . "-" . substr(4, 2, $a[8]) . "-" . substr(6, 2, $a[8]); parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "start-marketing-date", $date)); } if ($a[9]) { $date = substr(0, 4, $a[9]) . "-" . substr(4, 2, $a[9]) . "-" . substr(6, 2, $a[9]); parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "end-marketing-date", $date)); } if ($a[10]) { parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "marketing-category", $a[10])); } if ($a[11]) { parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "application-number", $a[11])); } // create a labeller node if ($a[12]) { $labeller_id = parent::getRes() . md5($a[12]); $label = addslashes($a[12]); parent::addRDF(parent::describeIndividual($labeller_id, $label, parent::getVoc() . "Labeller") . parent::describeClass(parent::getVoc() . "Labeller", "NDC Labeller") . parent::triplify($product_id, parent::getVoc() . "labeller", $labeller_id)); } // the next three are together if ($a[13]) { // MV $substances = explode(";", $a[13]); $strengths = explode(";", $a[14]); $units = explode(";", $a[15]); $l = ''; foreach ($substances as $i => $substance) { // list the active ingredient $ingredient_label = strtolower($substance); $strength = ''; if (isset($strengths[$i])) { $strength = $strengths[$i]; } $unit = $units[$i]; $ingredient_id = parent::getRes() . md5($ingredient_label); parent::addRDF(parent::describeIndividual($ingredient_id, $ingredient_label, parent::getVoc() . "Ingredient") . parent::describeClass(parent::getVoc() . "Ingredient", "NDC Ingredient") . parent::triplify($product_id, parent::getVoc() . "ingredient", $ingredient_id)); // describe the substance composition $substance_label = "{$strength} {$unit} {$ingredient_label}"; $substance_id = parent::getRes() . md5($substance_label); parent::addRDF(parent::describeIndividual($substance_id, $substance_label, parent::getVoc() . "Substance") . parent::triplifyString($substance_id, parent::getVoc() . "amount", $strength) . parent::describeClass(parent::getVoc() . "Substance", "NDC Substance")); $unit_id = parent::getVoc() . md5($unit); parent::addRDF(parent::describeIndividual($unit_id, $unit, parent::getVoc() . "Unit") . parent::describeClass(parent::getVoc() . "Unit", "NDC Unit") . parent::triplify($substance_id, parent::getVoc() . "amount_unit", $unit_id) . parent::triplify($product_id, parent::getVoc() . "has-part", $substance_id)); } } if ($a[16]) { // MV $b = explode(",", $a[16]); foreach ($b as $c) { $cat_id = parent::getVoc() . md5($c); parent::addRDF(parent::describeIndividual($cat_id, $c, parent::getVoc() . "Pharmacological-Class") . parent::describeClass(parent::getVoc() . "Pharmacological-Class", "NDC Pharmacological Class") . parent::triplify($product_id, parent::getVoc() . "pharmacological-class", $cat_id)); } } parent::WriteRDFBufferToWriteFile(); } }
function pubmed() { $citations = null; $ext = substr(strrchr($this->getReadFile()->getFileName(), '.'), 1); if ($ext = "gz") { $citations = new SimpleXMLElement("compress.zlib://" . $this->getReadFile()->getFileName(), NULL, TRUE); } elseif ($ext = "xml") { $citations = new SimpleXMLElement($this->getReadFile()->getFileName(), NULL, TRUE); } foreach ($citations->MedlineCitation as $citation) { $this->setCheckPoint('record'); $pmid = "" . $citation->PMID; if (isset($this->id_list)) { if (!isset($this->id_list[$pmid])) { continue; } else { echo "processing {$pmid}" . PHP_EOL; } } $pmid_uri = parent::getNamespace() . $citation->PMID; $article = $citation->Article; parent::addRDF(parent::describeIndividual($pmid_uri, $this->getString($article->ArticleTitle), parent::getVoc() . "PubMedRecord") . parent::describeClass(parent::getVoc() . "PubMedRecord", "PubMedRecord") . parent::triplify($pmid_uri, "rdfs:seeAlso", "http://www.ncbi.nlm.nih.gov/pubmed/{$pmid}")); // metadata about the record $owner = parent::getRes() . md5($citation['Owner']); parent::addRDF(parent::describeIndividual($owner, $citation['Owner'], "foaf:Agent") . parent::triplify($pmid_uri, parent::getVoc() . "owner", $owner)); $status = parent::getRes() . md5($citation['Status']); parent::addRDF(parent::describeIndividual($status, $citation['Status'], parent::getVoc() . "Status") . parent::describeClass(parent::getVoc() . "Status", "Status") . parent::triplify($pmid_uri, parent::getVoc() . "status", $status) . parent::triplifyString($pmid_uri, parent::getVoc() . "version", $citation['VersionID'])); $this->addDate($pmid_uri, "version-date", $citation['VersionDate']); $this->addDate($pmid_uri, "date-created", $citation->DateCreated); $this->addDate($pmid_uri, "date-revised", $citation->DateRevised); $this->addDate($pmid_uri, "date-completed", $citation->DateCompleted); if (!empty($citation->MeshHeadingList)) { $i = 0; foreach ($citation->MeshHeadingList->MeshHeading as $mh) { $id = parent::getRes() . $pmid . "_mh_" . ++$i; $did = parent::getRes() . md5($mh->DescriptorName); parent::addRDF(parent::describeIndividual($id, $mh->DescriptorName, parent::getVoc() . "MeshHeading") . parent::describeClass(parent::getVoc() . "MeshHeading", "MeSH Heading") . parent::triplify($pmid_uri, parent::getVoc() . "mesh-heading", $id) . parent::triplifyString($id, parent::getVoc() . "descriptor-major-topic", "" . $mh->DescriptorName['MajorTopicYN']) . parent::describeIndividual($did, "" . $mh->DescriptorName, parent::getVoc() . "Mesh-Descriptor") . parent::triplify($id, parent::getVoc() . "mesh-descriptor", $did)); if (!empty($mh->QualifierName)) { foreach ($mh->QualifierName as $qualifier_name) { $qid = parent::getRes() . md5($qualifier_name); parent::addRDF(parent::describeIndividual($qid, $qualifier_name, parent::getVoc() . "Mesh-Qualifier") . parent::triplify($id, parent::getVoc() . "mesh-qualifier", $qid)); } } } } if (!empty($citation->ChemicalList)) { $i = 0; foreach ($citation->ChemicalList->Chemical as $chemical) { $id = parent::getRes() . $pmid . "_ch_" . ++$i; parent::addRDF(parent::describeIndividual($id, $chemical->NameOfSubstance, parent::getVoc() . "Chemical") . parent::describeClass(parent::getVoc() . "Chemical", "Chemical") . parent::triplify($pmid_uri, parent::getVoc() . "chemical", $id)); if ($chemical->RegistryNumber != "0") { // check if "EC" if (substr($chemical->RegistryNumber, 0, 2) == "EC") { $ec = substr($chemical->RegistryNumber, 3); parent::addRDF(parent::triplify($id, parent::getVoc() . "x-ec", "ec:" . $ec)); } else { parent::addRDF(parent::triplify($id, parent::getVoc() . "x-cas", "cas:" . $chemical->RegistryNumber)); } } } } if (!empty($citation->GeneSymbolList)) { foreach ($citation->GeneSymbolList->GeneSymbol as $geneSymbol) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "gene-symbol", $geneSymbol)); } } if (!empty($citation->SupplMeshList)) { foreach ($citation->SupplMeshList->SupplMeshName as $supplMeshName) { $id = parent::getRes() . md5($supplMeshName); parent::addRDF(parent::describeIndividual($id, $supplMeshName, parent::getVoc() . "MeshHeading") . parent::triplify($pmid_uri, parent::getVoc() . "supplemental-mesh-heading", $id)); } } foreach ($article->PublicationTypeList->PublicationType as $publicationType) { $id = parent::getRes() . md5($publicationType); $label = str_replace(" ", "-", $publicationType); parent::addRDF(parent::triplify($pmid_uri, parent::getVoc() . "publication-type", $id) . parent::describeClass($id, $publicationType)); } if (!empty($article->Abstract)) { $id = parent::getRes() . $pmid . "_ABSTRACT"; $label = "Abstract for PMID:{$pmid}"; $abstract = $article->Abstract; parent::addRDF(parent::describeIndividual($id, $label, parent::getVoc() . "Article-Abstract") . parent::describeClass(parent::getVoc() . "Article-Abstract", "Article Abstract") . parent::triplify($pmid_uri, "dc:abstract", $id) . parent::triplifyString($id, parent::getVoc() . "copyright", $abstract->CopyrightInformation)); $section = 0; $abstractText = ""; foreach ($abstract->AbstractText as $text) { $abstractText .= " " . $text; if (!empty($text['Label']) && $text['Label'] !== "UNLABELLED") { $section_id = parent::getRes() . $pmid . "_ABSTRACT_SECTION_" . ++$section; parent::addRDF(parent::triplify($id, parent::getVoc() . "section", $section_id) . parent::triplifyString($section_id, parent::getVoc() . "order", $section) . parent::triplifyString($section_id, parent::getVoc() . "nlm-section-type", $text['NlmCategory']) . parent::triplifyString($section_id, parent::getVoc() . "label", $text['Label']) . parent::triplifyString($section_id, parent::getVoc() . "text", $text)); } } parent::addRDF(parent::triplifyString($id, parent::getVoc() . "abstract-text", $abstractText)); } if (!empty($citation->OtherAbstract)) { $i = 0; foreach ($citation->OtherAbstract as $ab) { $id = parent::getRes() . $pmid . "_oa_" . ++$i; parent::addRDF(parent::describeIndividual($id, "", parent::getVoc() . "Article-Abstract") . parent::describeClass(parent::getVoc() . "Article-Abstract", "Article Abstract") . parent::triplify($pmid_uri, "dc:abstract", $id)); $abstractText = ""; foreach ($ab->AbstractText as $text) { $abstractText .= " " . $text; if (!empty($text['Label']) && $text['Label'] !== "UNLABELLED") { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "abstract_" . strtolower($text['Category']), $text)); } } parent::addRDF(parent::triplifyString($id, parent::getVoc() . "abstract-text", $abstractText)); } } $author_types = array("Investigator", "Author", "PersonalNameSubject"); foreach ($author_types as $author_type) { $listname = $author_type . "List"; if (!empty($article->{$listname}->{$author_type})) { $i = 0; foreach ($article->{$listname}->{$author_type} as $author) { $id = parent::getRes() . $pmid . "_AUTHOR_" . ++$i; $author_label = $author->LastName . ($author->Initials ? ", " . $author->Initials : ""); parent::addRDF(parent::describeIndividual($id, $author_label, parent::getVoc() . $author_type) . parent::describeClass(parent::getVoc() . $author_type, $author_type) . parent::triplifyString($id, parent::getVoc() . "list-position", $i) . parent::triplify($pmid_uri, parent::getVoc() . strtolower($author_type), $id) . parent::triplifyString($id, parent::getVoc() . "last-name", $author->LastName) . parent::triplifyString($id, parent::getVoc() . "fore-name", $author->ForeName) . parent::triplifyString($id, parent::getVoc() . "initials", $author->Initials) . parent::triplifyString($id, parent::getVoc() . "collective-name", $author->CollectiveName) . parent::triplifyString($id, parent::getVoc() . "suffix", $author->Suffix)); if ($author->Affiliation) { $affilitation = parent::getRes() . md5($author->Affilitation); parent::addRDF(parent::describeIndividual($affilitation, $author->Affilitation, parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplifyString($id, parent::getVoc() . "affiliation", $affilitation)); } foreach ($author->NameID as $authorNameId) { if (!empty($authorNameId)) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "name-id", $author_name_id)); } } } } } if (!empty($article->ArticleDate)) { $this->addDate($pmid_uri, "article-date", $article->ArticleDate); } foreach ($article->Language as $language) { parent::addRDF(parent::triplifyString($pmid_uri, "dc:language", $language)); } if (!empty($citation->KeywordList)) { foreach ($citation->KeywordList->Keyword as $keyword) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "keyword", $keyword)); } } if (!empty($citation->otherID)) { // untested foreach ($citation->OtherID as $otherID) { if (!empty($otherID)) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "other-id", $other_id) . parent::triplifyString($pmid_uri, parent::getVoc() . "other-id-source", $otherID['Source'])); if (strstr($other_id, "PMC")) { parent::addRDF(parent::triplify($pmid_uri, parent::getVoc() . "x-pmc", "pmc:" . $other_id)); } } } } if (!empty($article->DataBankList)) { foreach ($article->DataBankList->DataBank as $dataBank) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "databank", $dataBank->DataBankName)); if ($dataBank->AccessionNumberList !== NULL) { foreach ($dataBank->AccessionNumberList->AccessionNumber as $acc) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "x-" . strtolower($dataBank->dataBankName), $acc)); } } } } if (!empty($article->GrantList)) { $i = 0; foreach ($article->GrantList->Grant as $grant) { $id = parent::getRes() . $pmid . "_GRANT_" . ++$i; $grant_label = "Grant " . $grant->GrantID . " for " . parent::getNamespace() . $pmid; parent::addRDF(parent::describeIndividual($id, $grant_label, parent::getVoc() . "Grant") . parent::describeClass(parent::getVoc() . "Grant", "Grant") . parent::triplify($pmid_uri, parent::getVoc() . "grant", $id) . parent::triplifyString($id, parent::getVoc() . "grant-identifier", $grant->GrantID) . parent::triplifyString($id, parent::getVoc() . "grant-acronym", $grant->Acronym) . parent::triplifyString($id, parent::getVoc() . "grant-agency", $grant->Agency) . parent::triplifyString($id, parent::getVoc() . "grant-country", $grant->Country)); } } if (!empty($citation->NumberOfReferences)) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "number-of-references", $citation->NumberOfReferences)); } if (!empty($article->VernacularTitle)) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "vernacular-title", $article->VernacularTitle)); } foreach ($citation->CitationSubset as $citationSubset) { if (!empty($citationSubset)) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "citation-subset", $citationSubset)); } } if (!empty($citation->commentsCorrectionsList)) { $i = 0; foreach ($commentsCorrectionsList->CommentsCorrections as $commentCorrection) { $id = parent::getRes() . $pmid . "_COMMENT_CORRECTION_" . ++$i; $ccRefType = $commentCorrection['RefType']; $ccPmid = $commentCorrection->PMID; //optional $ccNote = $commentCorrection->Note; //optional $cc_label = "Comment or correction ." . $ccNumber . " for " . parent::getNamespace() . $pmid; parent::addRDF(parent::describeIndividual($id, $cc_label, parent::getVoc() . "CommentCorrection") . parent::describeClass(parent::getVoc() . "CommentCorrection", "CommentCorrection") . parent::triplify($pmid_uri, parent::getVoc() . "comment-correction", $id) . parent::triplify($id, "rdf:type", parent::getVoc() . $ccRefType) . parent::triplifyString($id, parent::getVoc() . "ref-source", $ref_source) . parent::triplifyString($id, parent::getVoc() . "note", $cc_note)); } } if (!empty($citation->generalNote)) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "general-note", $general_note)); } foreach ($citation->SpaceFlightMission as $spaceFlightMission) { if (!empty($spaceFlightMission)) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "space-flight-mission" . $space_flight_mission)); } } $journal = $article->Journal; $journalId = parent::getRes() . $pmid . "_JOURNAL"; $journal_label = "Journal for " . parent::getNamespace() . $pmid; parent::addRDF(parent::describeIndividual($journalId, $journal_label, parent::getVoc() . "Journal") . parent::describeClass(parent::getVoc() . "Journal", "Journal") . parent::triplify($pmid_uri, parent::getVoc() . "journal", $journalId) . parent::triplify($journalId, parent::getVoc() . "x-issn", "issn:" . $journal->ISSN) . parent::triplifyString($journalId, parent::getVoc() . "journal-nlm-identifier", $citation->MedLineJournalInfo->NlmUniqueID) . parent::triplifyString($journalId, parent::getVoc() . "journal-title", $journal->Title) . parent::triplifyString($journalId, parent::getVoc() . "journal-abbreviation", $journal->ISOAbbreviation) . parent::triplifyString($journalId, parent::getVoc() . "volume", $journal->JournalIssue->Volume) . parent::triplifyString($journalId, parent::getVoc() . "issue", $journal->JournalIssue->Issue) . parent::triplifyString($journalId, parent::getVoc() . "pages", "" . $article->Pagination->MedlinePgn)); $journalPubDate = $journal->JournalIssue->PubDate; if (!empty($journalPubDate)) { $journalYear = $journalPubDate->Year; $journalMonth = trim($journalPubDate->Month); //optional if ($journalMonth and !is_numeric($journalMonth[0])) { $mo = array("jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"); $journalMonth = str_pad(array_search(strtolower($journalMonth), $mo) + 1, 2, "0", STR_PAD_LEFT); } $journalDay = trim($journalPubDate->Day); //optional if ($journalDay) { $journalDay = str_pad($journalDay, 2, "0", STR_PAD_LEFT); } parent::addRDF(parent::triplifyString($journalId, parent::getVoc() . "publication-year", $journalYear) . parent::triplifyString($journalId, parent::getVoc() . "publication-month", $journalMonth) . parent::triplifyString($journalId, parent::getVoc() . "publication-day", $journalDay) . parent::triplifyString($journalId, parent::getVoc() . "publication-season", $journalPubDate->Season) . parent::triplifyString($journalId, parent::getVoc() . "publication-date", $journalPubDate->MedlineDate)); if (!empty($journalYear) and !empty($journalMonth) and !empty($journalDay)) { parent::addRDF(parent::triplifyString($journalId, parent::getVoc() . "publication-date", "{$journalYear}-{$journalMonth}-{$journalDay}", "xsd:date")); } } foreach ($citation->Article->ELocation as $eLocation) { if (!empty($eLocation)) { parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "elocation", $eLocation)); } } $this->writeRDFBufferToWriteFile(); //break; } }
function models() { $tax_ids = array("Caenorhabditis elegans" => "6239", "Mus musculus" => "10090", "Saccharomyces cerevisiae" => "4932", "Drosophila melanogaster" => "7227", "Podospora anserina" => "5145", "Mesocricetus auratus" => "10036", "Schizosaccharomyces pombe" => "4896", "Danio rerio" => "7955"); $h = explode(",", parent::getReadFile()->read()); $expected_columns = 8; if (($n = count($h)) != $expected_columns) { trigger_error("Found {$n} columns in gene file - expecting {$expected_columns}!", E_USER_WARNING); return false; } /* [0] GenAge ID [1] symbol [2] name [3] organism [4] entrez gene id [5] avg lifespan change (max obsv) [6] lifespan effect [7] longevity influence */ while ($l = parent::getReadFile()->read(200000)) { $data = str_getcsv($l); $genage = str_pad($data[0], 4, "0", STR_PAD_LEFT); $gene_symbol = $data[1]; $name = $data[2]; $organism = $data[3]; $ncbi_gene_id = $data[4]; $max_percent_obsv_avg_lifespan_change = $data[5]; $lifespan_effect = $data[6]; $longevity_influence = $data[7]; $genage_id = parent::getNamespace() . $genage; parent::addRDF(parent::describeIndividual($genage_id, $name, parent::getVoc() . "Aging-Related-Gene") . parent::describeClass(parent::getVoc() . "Aging-Related-Gene", "Aging Related Gene")); parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "gene-symbol", parent::safeLiteral($gene_symbol))); parent::addRDF(parent::triplify($genage_id, parent::getVoc() . "taxon", "ncbitaxon:" . $tax_ids[$organism])); if ($ncbi_gene_id !== "") { parent::addRDF(parent::triplify($genage_id, parent::getVoc() . "x-ncbigene", "ncbigene:" . $ncbi_gene_id)); } if ($max_percent_obsv_avg_lifespan_change !== "") { parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "maximum-percent-observed-average-lifespan-change", parent::safeLiteral($max_percent_obsv_avg_lifespan_change))); } if ($lifespan_effect == "Increase and Decrease") { parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", "increase") . parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", "decrease")); } else { parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", strtolower($lifespan_effect))); } parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "longevity-influence", strtolower($longevity_influence))); parent::WriteRDFBufferToWriteFile(); } }
function gene_manipulations() { $h = explode(",", parent::getReadFile()->read()); $expected_columns = 5; if (($n = count($h)) != $expected_columns) { trigger_error("Found {$n} columns in gene file - expecting {$expected_columns}!", E_USER_WARNING); return false; } while ($l = parent::getReadFile()->read(200000)) { $data = str_getcsv($l); $gendr = $data[0]; $gene_symbol = $data[1]; $species_name = $data[2]; $geneid = $data[3]; $gene_name = $data[4]; // $references = $data[5]; $gendr_id = parent::getNamespace() . $gendr; $gendr_label = $gene_name . " (" . $gene_symbol . ")"; $association_id = parent::getRes() . md5($gendr . $geneid . "_association"); $association_label = "Association between " . $gene_symbol . " and variation in life span extension induced by dietary restriction"; parent::addRDF(parent::describeIndividual($gendr_id, $gendr_label, parent::getVoc() . "Dietary-Restriction-Life-Extension-Related-Gene") . parent::describeClass(parent::getVoc() . "Dietary-Restriction-Life-Extension-Related-Gene", "Dietery Restriction Life Extension Related Gene") . parent::triplify($gendr_id, parent::getVoc() . "x-ncbigene", "ncbigene:" . $geneid) . parent::triplifyString($gendr_id, parent::getVoc() . "gene-name", $gene_name) . parent::triplifyString($gendr_id, parent::getVoc() . "gene-symbol", $gene_symbol) . parent::describeIndividual($association_id, $association_label, parent::getVoc() . "Gene-Phenotype-Association") . parent::describeClass(parent::getVoc() . "Gene-Phenotype-Association", "Gene Phenotype Association") . parent::triplify($association_id, parent::getVoc() . "gene", $gendr_id) . parent::triplify($association_id, parent::getVoc() . "phenotype", parent::getVoc() . "Diet-Induced-Life-Span-Variant")); if ($species_name == "Caenorhabditis elegans") { parent::addRDF(parent::triplify($association_id, parent::getVoc() . "phenotype", "wormbase:WBPhenotype:0001837") . parent::triplify($association_id, parent::getVoc() . "taxon", "taxon:6239")); } else { if ($species_name == "Saccharomyces cerevisiae") { parent::addRDF(parent::triplify($association_id, parent::getVoc() . "taxon", "taxon:4932")); } else { if ($species_name == "Schizosaccharomyces pombe") { parent::addRDF(parent::triplify($association_id, parent::getVoc() . "taxon", "taxon:9896")); } else { if ($species_name == "Drosophila melanogaster") { parent::addRDF(parent::triplify($association_id, parent::getVoc() . "taxon", "taxon:7227")); } else { if ($species_name == "Mus musculus") { parent::addRDF(parent::triplify($association_id, parent::getVoc() . "taxon", "taxon:10090")); } } } } } if (!empty($references)) { $split_refs = explode(",", $references); foreach ($split_refs as $ref) { parent::addRDF(parent::triplify($gendr_id, parent::getVoc() . "article", "pmid:" . $ref) . parent::triplify($association_id, parent::getVoc() . "article", "pmid:" . $ref)); } } parent::writeRDFBufferToWriteFile(); } //while }
/** * add an RDF representation of the incoming param to the model. * @$desc_record_arr is an assoc array with the contents of one qualifier record */ private function makeDescriptorRecord($desc_record_arr) { //get the UI of the descriptor record $dr_ui = $desc_record_arr["UI"][0]; $dr_res = $this->getNamespace() . $dr_ui; $dr_label = $desc_record_arr['MH'][0]; parent::AddRDF(parent::describeIndividual($dr_res, $dr_label, $this->getVoc() . "Descriptor", $dr_label) . parent::describeClass($this->getVoc() . "Descriptor", "MeSH Descriptor")); //now get the descriptor_data_elements $qde = $this->getDescriptorDataElements(); //iterate over the properties foreach ($desc_record_arr as $k => $v) { if (array_key_exists($k, $qde)) { if ($k == "AN") { foreach ($v as $kv => $vv) { //explode by semicolon $vvrar = explode(";", $vv); foreach ($vvrar as $anAn) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde["AN"], $anAn) . parent::describeProperty($this->getVoc() . $qde["AN"], "Relationship between a descriptor and its annotation")); } //foreach } //foreach } //if //add allowable topical qualifiers if ($k == "AQ") { //$x = $this->getDescriptorDataElements(); foreach ($v as $kv => $vv) { $vvrar = explode(" ", $vv); foreach ($vvrar as $aq) { $aq_res = $this->getRes() . $aq; parent::AddRDF(parent::triplify($aq_res, "rdf:type", $this->getVoc() . "allowable-topical-qualifier") . parent::describeClass($this->getVoc() . "allowable-topical-qualifier", "allowable topical qualifier: " . $qde['AQ'])); parent::AddRDF(parent::triplify($dr_res, $this->getVoc() . $qde['AQ'], $aq_res) . parent::describeProperty($this->getVoc() . $qde['AQ'], "Relationship between a descriptor and its allowable topical qualifiers")); } //foreach } //foreach } //if //add CATALOGING SUBHEADINGS LIST NAME if ($k == "CATSH") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['CATSH'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['CATSH'], "Relationship between a descriptor and its cataloging subheadings list name")); } } //if if ($k == "CX") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['CX'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['CATSH'], "Relationship between a descriptor and xrefs")); } } //if //add date of entry if ($k == "DA") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['DA'], $this->formatDate($vv), "xsd:date") . parent::describeProperty($this->getVoc() . $qde['DA'], "Relationship between a descriptor and its date of entry")); } } //if //descriptor class if ($k == "DC") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['DC'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['DC'], "Relationship between a descriptor and its descriptor class")); } } //if //descriptor entry version if ($k == "DE") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['DE'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['DE'], "Relationship between a descriptor record and its entry version")); } } //if //descriptor sort version if ($k == "DS") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['DS'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['DS'], "Relationship between a descriptor record and its sort version")); } } //if //date major descriptor established if ($k == "DX") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['DX'], $this->formatDate($vv), "xsd:date") . parent::describeProperty($this->getVoc() . $qde['DX'], "Relationship between a descriptor and its date of major descriptor established")); } } //if if ($k == "EC") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['EC'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['EC'], "Relationship between a descriptor and its entry combination")); } } if ($k == "PRINT ENTRY") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['PRINT ENTRY'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['PRINT ENTRY'], "Relationship between a descriptor and its print entry term")); } } if ($k == "ENTRY") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['ENTRY'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['ENTRY'], "Relationship between a descriptor and its entry term")); } } if ($k == "FX") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['FX'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['FX'], "Relationship between a descriptor and its forward cross reference")); } } if ($k == "GM") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['GM'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['GM'], "Relationship between a descriptor and its grateful med note")); } } if ($k == "HN") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['HN'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['HN'], "Relationship between a descriptor record and its history note")); } } if ($k == "MED") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['MED'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['MED'], "Relationship between a descriptor and its backfile postings")); } } if ($k == "M94") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M94'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M94'], "Relationship between a descriptor and its backfile postings")); } } if ($k == "M90") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M90'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M90'], "Relationship between a descriptor and its backfile postings")); } } if ($k == "M85") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M85'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M85'], "Relationship between a descriptor record and its backfile postings")); } } if ($k == "M80") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M80'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M80'], "Relationship between a descriptor record and its backfile postings")); } } if ($k == "M75") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M75'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M75'], "Relationship between a descriptor record and its backfile postings")); } } if ($k == "M66") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M66'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M66'], "Relationship between a descriptor record and its backfile postings")); } } if ($k == "MH_TH") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['MH_TH'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['MH_TH'], "Relationship between a descriptor record and its MeSH Heading thesaurus id")); } } if ($k == "MH") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['MH'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['MH'], "Relationship between a descriptor record and its MeSH Heading")); } } if ($k == "MN") { foreach ($v as $kv => $vv) { $vid = parent::getNamespace() . $vv; $vlabel = utf8_encode(htmlspecialchars($vv)); parent::AddRDF(parent::describeIndividual($vid, $dr_label, parent::getVoc() . "Tree-Entry", $vlabel) . parent::triplify($dr_res, $this->getVoc() . $qde['MN'], $vid) . parent::describeProperty($this->getVoc() . $qde['MN'], "Relationship between a descriptor record and its MeSH Tree Number")); if (FALSE !== ($pos = strrpos($vv, "."))) { $pid = parent::getNamespace() . substr($vv, 0, $pos); parent::addRDF(parent::triplify($vid, "rdfs:subClassOf", $pid)); } } } if ($k == "MR") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['MR'], $this->formatDate($vv), "xsd:date") . parent::describeProperty($this->getVoc() . $qde['MR'], "Relationship between a descriptor record and its major revision date")); } } if ($k == "MS") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['MS'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['MS'], "Relationship between a descriptor record and its MeSH scope note")); } } if ($k == "N1") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['N1'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['N1'], "Relationship between a descriptor record and its CAS 1 name")); } } if ($k == "OL") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['OL'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['OL'], "Relationship between a descriptor record and its online note")); } } if ($k == "PA") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['PA'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['PA'], "Relationship between a descriptor record and its pharmacological action")); } } if ($k == "PI") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['PI'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['PI'], "Relationship between a descriptor record and its previous indexing")); } } if ($k == "PM") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['PM'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['PM'], "Relationship between a descriptor record and its public mesh note")); } } if ($k == "PX") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['PX'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['PX'], "Relationship between a descriptor record and its pre explosion")); } } if ($k == "RECTYPE") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['RECTYPE'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['RECTYPE'], "Relationship between a descriptor record and its record type")); } } if ($k == "RH") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['RH'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['RH'], "Relationship between a descriptor record and its running head, in relation to mesh tree structures")); } } if ($k == "RN") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['RN'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['RN'], "Relationship between a descriptor record and its CAS registry")); } } if ($k == "RR") { foreach ($v as $kv => $vv) { parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['RR'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['RR'], "Relationship between a descriptor record and its registry number")); } } if ($k == "ST") { foreach ($v as $kv => $vv) { $vid = parent::getNamespace() . $vv; $pid = parent::getNamespace() . substr($vv, 0, strrpos($vv, ".") - 1); $vlabel = utf8_encode(htmlspecialchars($vv)); parent::AddRDF(parent::describeIndividual($vid, $vlabel, parent::getVoc() . "Semantic-Type", $vlabel) . parent::triplify($dr_res, $this->getVoc() . $qde['ST'], $vid) . parent::describeProperty($this->getVoc() . $qde['ST'], "Relationship between a descriptor record and its semantic type")); } } } else { trigger_error("Please add key to descriptor record map: " . $k . PHP_EOL, E_USER_ERROR); } $this->WriteRDFBufferToWriteFile(); } //foreach $this->WriteRDFBufferToWriteFile(); }
function parseEntry($lfile) { $fp = fopen($lfile, "r"); while ($l = fgets($fp, 100000)) { $k_t = trim(substr($l, 0, 12)); $v = trim(substr($l, 12)); if (!$k_t and $v == '') { continue; } // set the key to the current key if not empty, else keep using what was there before if (!isset($k)) { $k = $k_t; } else { if (!empty($k_t)) { $k = $k_t; } } if ($k == "///" or $k == "ENTRY1") { break; } if ($k == "ENTRY") { $a = explode(" ", $v, 2); $e['id'] = str_replace(array("EC ", " "), "", $a[0]); if (isset($this->org)) { $e['id'] = $this->org . "_" . $e['id']; } $e['type'] = trim(str_replace(array("Complete ", "Pathway Module"), array("", "Pathway Module"), $a[1])); $e['type_label'] = str_replace(" ", "-", $e['type']); $uri = parent::getNamespace() . $e['id']; continue; } // key with value if (in_array($k, array("NAME", "DESCRIPTION", "DEFINITION", "EQUATION", "COMMENT"))) { if ($k == "NAME") { parent::addRDF(parent::describeIndividual($uri, $v, parent::getVoc() . $e['type']) . parent::describeClass(parent::getVoc() . $e['type'], $e['type_label']) . parent::triplify($uri, "rdfs:seeAlso", "http://www.kegg.jp/dbget-bin/www_bget?" . $e['id'])); if ($e['type'] == 'Genome') { $a = explode(",", $v); parent::addRDF(parent::triplify($uri, "owl:sameAs", "kegg:" . $a[0])); } } else { if ($k == "DESCRIPTION") { parent::addRDF(parent::triplifyString($uri, "dc:description", $v)); } else { if ($k == "DEFINITION" and $e['type'] == "KO") { preg_match("/\\[([^\\]]+)\\]/", $v, $m); if (isset($m[1])) { parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-ec", $m[1])); } } else { if ($k == "COMMENT") { preg_match("/ICD-O: ([^,]+),/", $v, $m); if (isset($m[1])) { parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-icdo", "icdo:" . $m[1])); continue; } } else { parent::addRDF(parent::triplifyString($uri, parent::getVoc() . strtolower($k), $v)); } } } } continue; } if ($k == "RPAIR" and $e['type'] == "Reaction") { $list = explode(" ", $v); $id = parent::getRes() . $e['id'] . "." . $list[2] . "." . $list[3]; $rc = ''; if (isset($list[4])) { $rc = "kegg:" . substr($list[4], 4, -1); } parent::addRDF(parent::describeIndividual($id, $e['id'] . " " . $v, parent::getVoc() . "RPair-Role") . parent::describeClass(parent::getVoc() . "RPair-Role", "RPair Role") . parent::triplify($id, parent::getVoc() . "rpair", "kegg:" . $list[0]) . parent::triplifyString($id, parent::getVoc() . "role", $list[3]) . ($rc != '' ? parent::triplify($id, parent::getVoc() . "reaction-center", $rc) : '') . parent::triplify($uri, parent::getVoc() . "rpair-role", $id)); continue; } // list of entries if (in_array($k, array("ENZYME", "RPAIR", "RELATEDPAIR")) or in_array($e['type'], array("Compound", "RClass", "RPair")) and $k == "REACTION") { $list = explode(" ", $v); foreach ($list as $id) { if (!$id) { continue; } parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:{$id}")); } continue; } // key with semi-colon separated values if (in_array($k, array("CLASS", "CATEGORY", "KEYWORDS", "CHROMOSOME", "ANNOTATION", "ACTIVITY", "TYPE"))) { $a = explode(";", $v); foreach ($a as $c) { parent::addRDF(parent::triplifyString($uri, parent::getVoc() . strtolower($k), trim($c))); } continue; } // kegg seems to make a prefix mistake with the pathway identifiers... if ($k == "PATHWAY") { $a = explode(" ", $v, 2); preg_match("/[a-z]+([0-9]{5})/", $a[0], $m); if (isset($m[1])) { parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:map" . $m[1])); } else { echo "pathway problem: " . $v . PHP_EOL; } continue; } // multi-line header with key-value pair if (in_array($k, array("PATHWAY_MAP", "STR_MAP", "MODULE", "DISEASE", "KO_PATHWAY", "COMPOUND"))) { // PATHWAY_MAP map00010 Glycolysis / Gluconeogenesis $a = explode(" ", $v, 2); $mid = $a[0]; if (strpos($a[0], '(') !== FALSE) { $mid = substr($a[0], 0, strpos($a[0], '(')); } if (isset($this->org) and $k == "MODULE") { $mid = substr($mid, strpos($v, "_") + 1); } parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:" . $mid)); continue; } // REACTION parsing if (preg_match("/\\[RN:([^\\]]+)]/", $v, $m) != FALSE) { $list = explode(" ", $m[1]); foreach ($list as $item) { parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:" . $item)); } continue; } if ($k == "DRUG") { preg_match("/\\[DR:([^\\]]+)]/", $v, $m); if (isset($m[1])) { $list = explode(" ", $m[1]); foreach ($list as $item) { parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:" . $item)); } continue; } } if ($k == "TAXONOMY") { parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:" . str_replace("TAX", "taxonomy", $v))); continue; } // a list of objects to parse out that are defined within square brackets if (in_array($k, array("SOURCE", "COMPONENT"))) { preg_match_all("/\\[([^\\]]+)\\]/", $v, $m); if (isset($m[1])) { foreach ($m[1] as $id) { $myid = str_replace(array("TAX", "CPD", "DR"), array("taxonomy", "kegg", "kegg"), $id); parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), $myid)); } continue; } } // multi-line header with multi-key single value pair if (in_array($k, array("ORTHOLOGY", "REACTION"))) { // K00844,K12407,K00845 hexokinase/glucokinase [EC:2.7.1.1 2.7.1.2] [RN:R01786] // R01786,R02189,R09085 C00267 -> C00668 $a = explode(" ", $v, 2); $ids = explode(",", $a[0]); if ($k == "REACTION" and $ids[0][0] != "R") { echo "unable to parse {$k}" . PHP_EOL; continue; } if (!isset($a[1])) { if ($e['type'] == "Reaction") { parent::addRDF(parent::triplify($uri, parent::getVoc() . "orthology", "kegg:" . trim($a[0]))); continue; } echo "parse error: " . $k . " " . $v . PHP_EOL; continue; } $str = $a[1]; foreach ($ids as $id) { $o = ''; $o['id'] = $id; $o['label'] = $str; $o['type'] = strtolower($k); parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:{$id}")); } continue; } if ($k == "DBLINKS") { // DBLINKS GO: 0006096 0006094 $a = explode(": ", $v, 2); $ns = str_replace(array("ncbi-geneid", "ncbi-gi", "rn", "pubchem", "pdb-ccd", "icd-10", "um-bbd", "iubmb enzyme nomenclature", "explorenz - the enzyme database", "expasy - enzyme nomenclature database", "umbbd (biocatalysis/biodegradation database)", "brenda, the enzyme database"), array("ncbigene", "gi", "kegg", "pubchem.compound", "ccd", "icd10", "umbbd", "ec", "ec", "ec", "ec", "ec"), strtolower($a[0])); $ids = explode(" ", $a[1]); foreach ($ids as $id) { if (!$id) { continue; } parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-{$ns}", "{$ns}:{$id}")); } continue; } if ($k == "REMARK") { preg_match("/Same as: ([A-Z0-9]+)/", $v, $m); if (isset($m[1])) { parent::addRDF(parent::triplify($uri, parent::getVoc() . "same-as", "kegg:" . $m[1])); continue; } preg_match("/ATC code: (.*)/", $v, $m); if (isset($m[1])) { $list = explode(" ", $m[1]); foreach ($list as $item) { parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-atc", "atc:" . $item)); } continue; } preg_match("/Therapeutic category: (.*)/", $v, $m); if (isset($m[1])) { $list = explode(" ", $m[1]); foreach ($list as $item) { parent::addRDF(parent::triplifyString($uri, parent::getVoc() . "therapeutic-category", $item)); } continue; } preg_match("/Drug group: (.*)/", $v, $m); if (isset($m[1])) { $list = explode(" ", $m[1]); foreach ($list as $item) { parent::addRDF(parent::triplify($uri, parent::getVoc() . "drug-group", "kegg:" . $item)); } continue; } } if ($k == "PRODUCT" or $k == "SUBSTRATE") { preg_match("/([a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})/", $v, $m); if (isset($m[1])) { parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-dailymed", "dailymed:" . $m[1]) . parent::triplifyString("dailymed:" . $m[1], "rdfs:label", $v)); continue; } preg_match("/\\[CPD:([^\\]]+)\\]/", $v, $m); if (isset($m[1])) { parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:" . $m[1])); continue; } } if ($k == "STATISTICS") { $a = explode(": ", $v); parent::addRDF(parent::triplifyString($uri, parent::getVoc() . str_replace(" ", "-", strtolower($a[0])), $a[1])); continue; } if ($k == "ORGANISM") { $a = explode(" ", $v); parent::addRDF(parent::triplify($uri, parent::getVoc() . "organism", "kegg:" . $a[0])); continue; } if ($k == "REFERENCE") { if (!isset($ref)) { $ref = 1; } else { if (!isset($e['reference'][$ref]['title'])) { continue; } // this is a bug where the reference declaration is split onto two lines $ref++; } if (strstr($v, "PMID")) { // PMID:11529849 (marker) preg_match("/(PMID:[0-9]+) /", $v, $m); if (isset($m[1])) { $e['reference'][$ref]['pubmed'] = $m[1]; } } continue; } if ($k == "AUTHORS") { $e['reference'][$ref]['authors'] = $v; continue; } if ($k == "TITLE") { $e['reference'][$ref]['title'] = $v; continue; } if ($k == "JOURNAL") { $e['reference'][$ref]['journal'] = $v; continue; } if ($e['type'] == "Disease" and ($k == "GENE" or $k == "MARKER")) { // BCR-ABL (translocation) [HSA:613 25] [KO:K08878 K06619] preg_match_all("/ \\[([^\\]]+)\\]/", $v, $m); if (isset($m[1])) { foreach ($m[1] as $idlist) { $a = explode(":", $idlist); $ns = $a[0]; $b = explode(" ", $a[1]); foreach ($b as $id) { if ($ns == "KO") { $rel = "ko-" . strtolower($k); $gene = $id; } else { $rel = strtolower($k); $gene = $ns . "_" . $id; } parent::addRDF(parent::triplify($uri, parent::getVoc() . $rel, "kegg:{$gene}")); } } } else { echo $v; } continue; } if ($k == "GENES") { // ATH: AT1G32780 AT1G64710 AT1G77120(ADH1) AT5G24760 $a = explode(": ", $v); $org = $a[0]; $b = explode(" ", $a[1]); foreach ($b as $id) { $c = explode("(", $id); $gene = parent::getNamespace() . $org . "_" . $c[0]; parent::addRDF(parent::triplify($uri, parent::getVoc() . "gene", $gene)); } //echo parent::getRDF();exit; continue; } if ($k == "DRUG_TARGET") { // Afatinib: D09724 D09733 $s = substr($v, strpos($v, ":") + 2); $list = explode(" ", $s); foreach ($list as $item) { parent::addRDF(parent::triplify($uri, parent::getVoc() . "drug-target", "kegg:{$item}")); } continue; } if ($k == "STRUCTURE") { $list = explode(" ", $v); foreach ($list as $item) { if (trim($item) == '') { continue; } parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-pdb", "pdb:{$item}")); } continue; } if ($k == "MOTIF") { $list = explode(" ", $v); foreach ($list as $item) { parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-pfam", "pfam:{$item}")); } continue; } if (in_array($k, array("INTERACTION", "METABOLISM", "TARGET"))) { // dopamine D2-receptor antagonist [HSA:1813] [KO:K04145] $id = parent::getRes() . md5($uri . $v); $type = ucfirst(strtolower($k)); if (in_array($k, array("INTERACTION", "METABOLISM"))) { $a = explode(":", $v, 2); $modifier = $a[0]; } else { $modifier = ''; $s = substr($v, 0, strpos($v, "[") + 1); // dopamine D2-receptor antagonist [ preg_match("/ ([a-z]+) \\[/", $s, $m); if (isset($m[1])) { $modifier = $m[1]; } } parent::addRDF(parent::describeIndividual($id, $v, parent::getVoc() . $type) . parent::describeClass(parent::getVoc() . $type, $type) . parent::triplifyString($id, parent::getVoc() . "modifier", $modifier) . parent::triplify($uri, parent::getVoc() . strtolower($k), $id)); preg_match_all("/ \\[([^\\]]+)\\]/", $v, $m); if (isset($m[1])) { foreach ($m[1] as $item) { if (!strstr($item, "KO")) { $item = "kegg:" . str_replace(":", "_", $item); } else { $item = str_replace("KO:", "kegg:", $item); } parent::addRDF(parent::triplify($id, parent::getVoc() . "link", $item)); } } continue; } // skip these if (in_array($k, array("ATOM", "BOND", "BRITE", "AASEQ", "NTSEQ", "SEQUENCE"))) { continue; } // simple strings to keep as is if (in_array($k, array("EXACT_MASS", "FORMULA", "MOL_WEIGHT", "LINEAGE", "LENGTH", "MASS", "COMPOSITION", "NODE", "EDGE", "POSITION"))) { parent::addRDF(parent::triplifyString($uri, parent::getVoc() . strtolower($k), $v)); continue; } // default catchall parent::addRDF(parent::triplifyString($uri, parent::getVoc() . strtolower($k), $v . " [script:default]")); } if (isset($e['reference'])) { foreach ($e['reference'] as $i => $r) { $ref = parent::getRes() . $e['id'] . ".ref.{$i}"; parent::addRDF(parent::describeIndividual($ref, $r['title'], parent::getVoc() . "Reference") . parent::describeClass(parent::getVoc() . "Reference", "Reference") . parent::triplifyString($ref, parent::getVoc() . "authors", $r['authors']) . parent::triplifyString($ref, parent::getVoc() . "journal", $r['journal']) . parent::triplify($uri, parent::getVoc() . "reference", $ref)); if (isset($r['pubmed'])) { parent::addRDF(parent::triplify($ref, parent::getVoc() . "x-pubmed", $r['pubmed'])); } } } fclose($fp); }
function ParseEntry($obj, $type) { $o = $obj["omim"]["entryList"][0]["entry"]; $omim_id = $o['mimNumber']; $omim_uri = parent::getNamespace() . $o['mimNumber']; if (isset($o['version'])) { parent::setDatasetVersion($o['version']); } // add the links parent::addRDF($this->QQuadO_URL($omim_uri, "rdfs:seeAlso", "http://omim.org/entry/" . $omim_id)); parent::addRDF($this->QQuadO_URL($omim_uri, "owl:sameAs", "http://identifiers.org/omim/" . $omim_id)); // parse titles $titles = $o['titles']; parent::addRDF(parent::describeIndividual($omim_uri, $titles['preferredTitle'], parent::getVoc() . str_replace(array(" ", "/"), "-", ucfirst($type))) . parent::describeClass(parent::getVoc() . str_replace(array(" ", "/"), "-", ucfirst($type)), $type)); if (isset($titles['preferredTitle'])) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "preferred-title", $titles['preferredTitle'])); } if (isset($titles['alternativeTitles'])) { $b = explode(";;", $titles['alternativeTitles']); foreach ($b as $title) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "alternative-title", trim($title))); } } // parse text sections if (isset($o['textSectionList'])) { foreach ($o['textSectionList'] as $i => $section) { if ($section['textSection']['textSectionTitle'] == "Description") { parent::addRDF(parent::triplifyString($omim_uri, "dc:description", $section['textSection']['textSectionContent'])); } else { $p = str_replace(" ", "-", strtolower($section['textSection']['textSectionTitle'])); parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "{$p}", $section['textSection']['textSectionContent'])); } // parse the omim references preg_match_all("/\\{([0-9]{6})\\}/", $section['textSection']['textSectionContent'], $m); if (isset($m[1][0])) { foreach ($m[1] as $oid) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "refers-to", "omim:{$oid}")); } } } } // allelic variants if (isset($o['allelicVariantList'])) { foreach ($o['allelicVariantList'] as $i => $v) { $v = $v['allelicVariant']; $uri = parent::getRes() . "{$omim_id}" . "_allele_" . $i; $label = str_replace("\n", " ", $v['name']); parent::addRDF(parent::describeIndividual($uri, $label, parent::getVoc() . "Allelic-Variant") . parent::describeClass(parent::getVoc() . "Allelic-Variant", "Allelic Variant")); if (isset($v['alternativeNames'])) { $names = explode(";;", $v['alternativeNames']); foreach ($names as $name) { $name = str_replace("\n", " ", $name); parent::addRDF(parent::triplifyString($uri, parent::getVoc() . "alternative-names", $name)); } } if (isset($v['text'])) { parent::addRDF(parent::triplifyString($uri, "dc:description", $v['text'])); } if (isset($v['mutations'])) { parent::addRDF(parent::triplifyString($uri, parent::getVoc() . "mutation", $v['mutations'])); } if (isset($v['dbSnps'])) { $snps = explode(",", $v['dbSnps']); foreach ($snps as $snp) { parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-dbsnp", "dbsnp:" . $snp)); } } parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "variant", $uri)); } } // clinical synopsis if (isset($o['clinicalSynopsis'])) { $cs = $o['clinicalSynopsis']; $cs_uri = parent::getRes() . "" . $omim_id . "_cs"; parent::addRDF(parent::describeIndividual($cs_uri, "Clinical synopsis for omim {$omim_id}", parent::getVoc() . "Clinical-Synopsis") . parent::describeClass(parent::getVoc() . "Clinical-Synopsis", "Clinical Synopsis") . parent::triplify($omim_uri, parent::getVoc() . "clinical-synopsis", $cs_uri)); foreach ($cs as $k => $v) { if (!strstr($k, "Exists")) { // ignore the boolean assertion. // @todo ignore provenance for now if (in_array($k, array('contributors', 'creationDate', 'editHistory', 'epochCreated', 'dateCreated', 'epochUpdated', 'dateUpdated'))) { continue; } if (!is_array($v)) { $v = array($k => $v); } foreach ($v as $k1 => $v1) { $phenotypes = explode(";", $v1); foreach ($phenotypes as $coded_phenotype) { // parse out the codes $coded_phenotype = trim($coded_phenotype); if (!$coded_phenotype) { continue; } $phenotype = preg_replace("/\\{.*\\}/", "", $coded_phenotype); $phenotype_id = parent::getRes() . "" . md5(strtolower($phenotype)); $entity_id = parent::getRes() . "" . $k1; parent::addRDF(parent::describeIndividual($phenotype_id, $phenotype, parent::getVoc() . 'Characteristic') . parent::describeClass(parent::getVoc() . 'Characteristic', 'Characteristic') . parent::triplify($cs_uri, parent::getVoc() . "feature", $phenotype_id) . parent::describeIndividual($entity_id, $k1, parent::getVoc() . "Entity") . parent::describeClass(parent::getVoc() . "Entity", "Entity") . parent::triplify($phenotype_id, parent::getVoc() . "characteristic-of", $entity_id)); // parse out the vocab references preg_match_all("/\\{([0-9A-Za-z \\:\\-\\.]+)\\}|;/", $coded_phenotype, $codes); //preg_match_all("/((UMLS|HPO HP|SNOMEDCT|ICD10CM|ICD9CM|EOM ID)\:[A-Z0-9]+)/",$coded_phenotype,$m); if (isset($codes[1][0])) { foreach ($codes[1] as $entry) { $entries = explode(" ", trim($entry)); foreach ($entries as $e) { if ($e == "HPO" || $e == "EOM") { continue; } $this->getRegistry()->parseQName($e, $ns, $id); if (!isset($ns) || $ns == '') { $b = explode(".", $id); $ns = "omim"; $id = $b[0]; } else { $ns = str_replace(array("hpo", "id", "icd10cm", "icd9cm", "snomedct"), array("hp", "eom", "icd10", "icd9", "snomed"), $ns); } parent::addRDF(parent::triplify($phenotype_id, parent::getVoc() . "x-{$ns}", "{$ns}:{$id}")); } // foreach } // foreach } // codes } //foreach } // foreach } // exists } } // clinical synopsis // genemap if (isset($o['geneMap'])) { $map = $o['geneMap']; if (isset($map['chromosome'])) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "chromosome", (string) $map['chromosome'])); } if (isset($map['cytoLocation'])) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "cytolocation", (string) $map['cytoLocation'])); } if (isset($map['geneSymbols'])) { $b = preg_split("/[,;\\. ]+/", $map['geneSymbols']); foreach ($b as $symbol) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "gene-symbol", "symbol:" . trim($symbol))); } } if (isset($map['geneName'])) { $b = explode(",", $map['geneName']); foreach ($b as $name) { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "gene-name", trim($name))); } } if (isset($map['mappingMethod'])) { $b = explode(",", $map['mappingMethod']); foreach ($b as $c) { $mapping_method = trim($c); $method_uri = $this->get_method_type($mapping_method); if ($method_uri !== false) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "mapping-method", $method_uri)); } } } if (isset($map['mouseGeneSymbol'])) { $b = explode(",", $map['mouseGeneSymbol']); foreach ($b as $c) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "mouse-gene-symbol", "symbol:" . strtoupper($c))); } } if (isset($map['mouseMgiID'])) { $b = explode(",", $map['mouseMgiID']); foreach ($b as $c) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-mgi", $c)); } } if (isset($map['geneInheritance']) && $map['geneInheritance'] != '') { parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "gene-inheritance", $map['geneInheritance'])); } } if (isset($o['phenotypeMapList'])) { foreach ($o['phenotypeMapList'] as $i => $phenotypeMap) { $phenotypeMap = $phenotypeMap['phenotypeMap']; $pm_uri = parent::getRes() . $omim_id . "_pm_" . ($i + 1); parent::addRDF(parent::describeIndividual($pm_uri, "phenotype mapping for {$omim_id}", parent::getVoc() . "Phenotype-Map") . parent::describeClass(parent::getVoc() . "Phenotype-Map", "OMIM Phenotype-Map") . parent::triplify($omim_uri, parent::getVoc() . "phenotype-map", $pm_uri)); foreach (array_keys($phenotypeMap) as $k) { if (in_array($k, array("mimNumber", "phenotypeMimNumber", "phenotypicSeriesMimNumber"))) { parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . $k, "omim:" . $phenotypeMap[$k])); } else { if ($k == "geneSymbols") { $l = explode(", ", $phenotypeMap[$k]); foreach ($l as $gene) { parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . "gene-symbol", "hgnc.symbol:" . $gene)); } } else { if ($k == "phenotypeMappingKey") { $l = $this->get_phenotype_mapping_method_type($phenotypeMap[$k]); parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . "mapping-method", $l)); } else { parent::addRDF(parent::triplifyString($pm_uri, parent::getVoc() . $k, $phenotypeMap[$k])); } } } } } } // references if (isset($o['referenceList'])) { foreach ($o['referenceList'] as $i => $r) { $r = $r['reference']; if (isset($r['pubmedID'])) { $pubmed_uri = "pubmed:" . $r['pubmedID']; parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "article", $pubmed_uri)); $title = 'article'; if (isset($r['title'])) { $title = $r['title']; } parent::addRDF(parent::describe($pubmed_uri, addslashes($r['title']))); if (isset($r['articleUrl'])) { parent::addRDF($this->QQuadO_URL($pubmed_uri, "rdfs:seeAlso", htmlentities($r['articleUrl']))); } } } } // external ids if (isset($o['externalLinks'])) { foreach ($o['externalLinks'] as $k => $id) { if ($id === false) { continue; } $ns = ''; switch ($k) { case 'approvedGeneSymbols': $ns = 'symbol'; break; case 'geneIDs': $ns = 'ncbigene'; break; case 'ncbiReferenceSequences': $ns = 'gi'; break; case 'genbankNucleotideSequences': $ns = 'gi'; break; case 'proteinSequences': $ns = 'gi'; break; case 'uniGenes': $ns = 'unigene'; break; case 'ensemblIDs': $ns = 'ensembl'; break; case 'swissProtIDs': $ns = 'uniprot'; break; case 'mgiIDs': $ns = 'mgi'; $b = explode(":", $id); $id = $b[1]; break; case 'flybaseIDs': $ns = 'flybase'; break; case 'zfinIDs': $ns = 'zfin'; break; case 'hprdIDs': $ns = 'hprd'; break; case 'orphanetDiseases': $ns = 'orphanet'; break; case 'refSeqAccessionIDs': $ns = 'refseq'; break; case 'ordrDiseases': $ns = 'ordr'; $b = explode(";;", $id); $id = $b[0]; break; case 'snomedctIDs': $ns = 'snomed'; break; case 'icd10cmIDs': $ns = 'icd10'; break; case 'icd9cmIDs': $ns = 'icd9'; break; case 'umlsIDs': $ns = 'umls'; break; case 'wormbaseIDs': $ns = 'wormbase'; break; case 'diseaseOntologyIDs': $ns = 'do'; break; // specifically ignorning // specifically ignorning case 'geneTests': case 'cmgGene': case 'geneticAllianceIDs': // # // # case 'nextGxDx': case 'nbkIDs': // NBK1207;;Alport Syndrome and Thin Basement Membrane Nephropathy // NBK1207;;Alport Syndrome and Thin Basement Membrane Nephropathy case 'newbornScreeningUrls': case 'decipherUrls': case 'geneReviewShortNames': case 'locusSpecificDBs': case 'geneticsHomeReferenceIDs': case 'omiaIDs': case 'coriellDiseases': case 'clinicalDiseaseIDs': case 'possumSyndromes': case 'keggPathways': case 'gtr': case 'gwasCatalog': case 'mgiHumanDisease': case 'wormbaseDO': case 'dermAtlas': // true/false break; default: echo "unhandled external link {$k} {$id}" . PHP_EOL; } $ids = explode(",", $id); foreach ($ids as $id) { if ($ns) { if (strstr($id, ";;") === FALSE) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-{$ns}", $ns . ':' . $id)); } else { $b = explode(";;", $id); // multiple ids//names foreach ($b as $c) { preg_match("/([a-z])/", $c, $m); if (!isset($m[1])) { parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-{$ns}", $ns . ':' . $c)); } } } } } } } //external links }
private function names() { while ($l = $this->getReadFile()->read(200000)) { $a = explode("\t|\t", trim($l, "|\t\r\n")); if (count($a) == 0) { continue; } $taxid = parent::getNamespace() . trim($a[0]); $name = utf8_encode($a[1]); $rel = parent::getVoc() . str_replace(" ", "-", $a[3]); parent::addRDF(parent::triplifyString($taxid, $rel, $name) . parent::triplifyString($taxid, parent::getVoc() . "unique-name", utf8_encode($a[2]))); if ($rel == "scientific-name") { parent::addRDF(parent::triplifyString($taxid, "dc:title", $name) . parent::triplifyString($taxid, "rdfs:label", $name)); } $this->writeRDFBufferToWriteFile(); } //while }
function genes($file) { $xml = new CXML($file); while ($xml->parse("DisorderList") == TRUE) { $x = $xml->GetXMLRoot(); foreach ($x->Disorder as $d) { $orphanet_id = parent::getNamespace() . (string) $d->OrphaNumber; $disorder_name = (string) $d->Name; foreach ($d->DisorderGeneAssociationList->DisorderGeneAssociation as $dga) { // gene $gene = $dga->Gene; $gene_id = parent::getNamespace() . (string) $gene->OrphaNumber; $gene_internal_id = (string) $gene->attributes()->id; $gene_label = (string) $gene->Name; $gene_symbol = (string) $gene->Symbol; parent::addRDF(parent::describeIndividual($gene_id, $gene_label, parent::getVoc() . "Gene") . parent::describeClass(parent::getVoc() . "Gene", "orphanet gene") . parent::triplifyString($gene_id, parent::getVoc() . "symbol", $gene_symbol)); foreach ($gene->SynonymList as $s) { $synonym = (string) $s->Synonym; parent::addRDF(parent::triplifyString($gene_id, parent::getVoc() . "synonym", $synonym)); } foreach ($gene->ExternalReferenceList as $erl) { $er = $erl->ExternalReference; $db = (string) $er->Source; $db = parent::getRegistry()->getPreferredPrefix($db); $id = (string) $er->Reference; $xref = "{$db}:{$id}"; parent::addRDF(parent::triplify($gene_id, parent::getVoc() . "x-{$db}", $xref)); } $dga_id = parent::getRes() . (string) $d->OrphaNumber . "_" . md5($dga->asXML()); $ga = $dga->DisorderGeneAssociationType; $ga_id = parent::getNamespace() . (string) $ga->attributes()->id; $ga_label = (string) $ga->Name; $s = $dga->DisorderGeneAssociationStatus; $s_id = parent::getNamespace() . (string) $s->attributes()->id; $s_label = (string) $s->Name; parent::addRDF(parent::describeIndividual($dga_id, "{$ga_label} {$gene_label} in {$disorder_name} ({$s_label})", $ga_id) . parent::describeClass($ga_id, $ga_label, parent::getVoc() . "Disorder-Gene-Association") . parent::triplify($dga_id, parent::getVoc() . "status", $s_id) . parent::describeClass($s_id, $s_label, parent::getVoc() . "Disorder-Gene-Association-Status") . parent::triplify($dga_id, parent::getVoc() . "disorder", $orphanet_id) . parent::describeIndividual($orphanet_id, $disorder_name, parent::getVoc() . "Disorder") . parent::triplify($dga_id, parent::getVoc() . "gene", $gene_id)); } parent::writeRDFBufferToWriteFile(); } } unset($xml); }
function parse($file) { $xml = new CXML($file); $xml->parse(); $entry = $xml->getXMLRoot(); if (!isset($entry) or !$entry) { return false; } foreach ($entry->children() as $o) { $rsid = "rs" . $o->attributes()->rsId; $id = parent::getNamespace() . $rsid; $type = parent::getVoc() . ucfirst(str_replace(" ", "-", (string) $o->attributes()->snpClass)); $snpclass = parent::getVoc() . (string) $o->attributes()->snpClass; $moltype = parent::getVoc() . (string) $o->attributes()->molType; // attributes parent::addRDF(parent::describeIndividual($id, $rsid, $type) . parent::describeClass($type, ucfirst("" . $o->attributes()->snpClass)) . parent::triplify($id, parent::getVoc() . "mol-type", $moltype) . parent::describeClass($moltype, (string) $o->attributes()->molType, parent::getVoc() . "Moltype") . parent::describeClass(parent::getVoc() . "Moltype", "Moltype") . parent::triplify($id, parent::getVoc() . "taxid", "taxonomy:" . (string) $o->attributes()->taxId)); $genotype = (string) $o->attributes()->genoType; if ($genotype) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "genotype", parent::getVoc() . $genotype, "xsd:bool")); } // frequency // create/update /* if(!isset($o->Update)) $a = $o->Create; else $a = $o->Update; parent::addRDF(parent::triplifyString($id,parent::getVoc()."build",(string) $a->attributes()->build)); */ //validation $a = $o->Validation; parent::addRDF(parent::triplifyString($id, parent::getVoc() . "validation-by-cluster", (string) $a->attributes()->byCluster) . parent::triplifyString($id, parent::getVoc() . "validation-by-frequency", (string) $a->attributes()->byFrequency) . parent::triplifyString($id, parent::getVoc() . "validation-by-2hit2allele", (string) $a->attributes()->by2Hit2Allele) . parent::triplifyString($id, parent::getVoc() . "validation-by-1000G", (string) $a->attributes()->by1000G)); //hgvs names foreach ($o->hgvs as $name) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "hgvs-name", (string) $name)); } // assembly $assembly = $o->Assembly; if ($assembly and $assembly->attributes()->reference == "true") { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "dbsnp-build", (string) $assembly->attributes()->dbSnpBuild) . parent::triplifyString($id, parent::getVoc() . "genome-build", (string) $assembly->attributes()->genomeBuild)); $component = $assembly->Component; if ($component) { parent::addRDF(parent::triplify($id, parent::getVoc() . "contig-accession", "genbank:" . (string) $component->attributes()->accession) . parent::triplify($id, parent::getVoc() . "contig-gi", "gi:" . (string) $component->attributes()->gi) . parent::triplifyString($id, parent::getVoc() . "chromosome", (string) $component->attributes()->chromosome)); $maploc = $component->MapLoc; if ($maploc) { foreach ($maploc->children() as $fxnset) { $fxnset_id = parent::getRes() . md5($fxnset->asXML()); parent::addRDF(parent::triplify($id, parent::getVoc() . "maps-to", $fxnset_id) . parent::triplify($fxnset_id, "rdf:type", parent::getVoc() . "Fxnset") . parent::describeClass(parent::getVoc() . "Fxnset", "Fxnset")); if (isset($fxnset->attributes()->geneId)) { parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "gene", "ncbigene:" . (string) $fxnset->attributes()->geneId)); } if (isset($fxnset->attributes()->symbol)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "gene-symbol", (string) $fxnset->attributes()->symbol)); } if (isset($fxnset->attributes()->mrnaAcc)) { parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "mrna", "refseq:" . (string) $fxnset->attributes()->mrnaAcc)); } if (isset($fxnset->attributes()->protAcc)) { parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "protein", "refseq:" . (string) $fxnset->attributes()->protAcc)); } if (isset($fxnset->attributes()->fxnClass)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "fxn-class", (string) $fxnset->attributes()->fxnClass)); } if (isset($fxnset->attributes()->allele)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "allele", (string) $fxnset->attributes()->allele)); } if (isset($fxnset->attributes()->residue)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "residue", (string) $fxnset->attributes()->residue)); } if (isset($fxnset->attributes()->readingFrame)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "reading-frame", (string) $fxnset->attributes()->readingFrame)); } if (isset($fxnset->attributes()->aaPosition)) { parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "position", (string) $fxnset->attributes()->aaPosition)); } } } } } } unset($xml); }