/**
  * process a results xml file from the download directory
  **/
 function process_file($infile)
 {
     $indir = parent::getParameterValue('indir');
     $xml = new CXML($infile);
     $this->setCheckPoint('file');
     while ($xml->Parse("clinical_study") == TRUE) {
         $this->setCheckPoint('record');
         $this->root = $root = $xml->GetXMLRoot();
         $this->nct_id = $nct_id = $this->getString("//id_info/nct_id");
         $this->study_id = $study_id = parent::getNamespace() . "{$nct_id}";
         ### declare
         $label = $this->getString("//brief_title");
         if (!$label) {
             $label = $this->getString("//official_title");
         }
         if (!$label) {
             $label = "Clinical trial #" . $nct_id;
         }
         parent::addRDF(parent::describeIndividual($study_id, $label, parent::getVoc() . "Clinical-Study") . parent::describeClass(parent::getVoc() . "Clinical-Study", "Clinical Study"));
         ##########################################################################################
         #required header
         ##########################################################################################
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "download-date", $this->getString('//required_header/download_date')) . parent::triplify($study_id, parent::getVoc() . "url", $this->getString('//required_header/url')));
         ##########################################################################################
         #identifiers
         ##########################################################################################
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "nct-id", $this->getString('//id_info/nct_id'), "xsd:string") . parent::triplifyString($study_id, parent::getVoc() . "org-study-id", $this->getString('//id_info/org_study_id'), "xsd:string"));
         $sids = $root->xpath('//id_info/secondary_id');
         if (isset($sids)) {
             foreach ($sids as $id) {
                 parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "secondary-id", (string) $id, "xsd:string"));
             }
         }
         $nctaliases = $root->xpath('//id_info/nct-alias');
         if (isset($nctaliases)) {
             foreach ($nctaliases as $id) {
                 parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "nct-alias", (string) $id, "xsd:string"));
             }
         }
         ##########################################################################################
         #titles
         ##########################################################################################
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "brief-title", $this->getString("//brief_title")) . parent::triplifyString($study_id, parent::getVoc() . "official-title", $this->getString("//official_title")));
         ###################################################################################
         #brief summary
         ###################################################################################
         $brief_summary = str_replace(array("\r", "\n", "\t"), array("
", "
", "	"), $this->getString('//brief_summary/textblock'));
         parent::addRDF(parent::triplifyString($study_id, $this->getVoc() . "brief-summary", $brief_summary));
         ####################################################################################
         # detailed description
         ####################################################################################
         $d = str_replace(array("\r", "\n", "\t"), array("
", "
", "	"), $this->getString('//detailed_description/textblock'));
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "detailed-description", $d));
         #########################################################################################
         #acronym
         #########################################################################################
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "acronym", $this->getString("//acronym")));
         ########################################################################################
         #sponsors
         ########################################################################################
         try {
             $sponsors = array("lead_sponsor", "collaborator");
             foreach ($sponsors as $sponsor) {
                 $a = @array_shift($root->xpath('//sponsors/' . $sponsor));
                 if ($a == null) {
                     break;
                 }
                 $agency = $this->getString("//agency", $a);
                 $agency_id = parent::getRes() . md5($agency);
                 $agency_class = $this->getString("//agency_class", $a);
                 $agency_class_id = parent::getRes() . md5($agency_class);
                 parent::addRDF(parent::describeIndividual($agency_id, $agency, parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $sponsor), $agency_id) . parent::describeIndividual($agency_class_id, $agency_class, parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplify($agency_id, parent::getVoc() . "organization", $agency_class_id));
             }
         } catch (Exception $e) {
             echo "There was an error in the lead sponsor element: {$e}\n";
         }
         #################################################################################
         # source
         #################################################################################
         $source = $this->getString('//source');
         if ($source) {
             $source_id = parent::getRes() . md5($source);
             parent::addRDF(parent::describeIndividual($source_id, $source, parent::getVoc() . "Organization") . parent::triplify($study_id, parent::getVoc() . "source", $source_id));
         }
         ######################################################################################
         # oversight
         ######################################################################################
         try {
             $oversight = @array_shift($root->xpath('//oversight_info'));
             $oversight_id = parent::getRes() . md5($oversight->asXML());
             $authority = $this->getString('//authority', $oversight);
             $authority_id = parent::getRes() . md5($authority);
             parent::addRDF(parent::describeIndividual($oversight_id, $authority, parent::getVoc() . "Organization") . parent::triplify($study_id, $this->getVoc() . "oversight", $oversight_id) . parent::triplify($study_id, $this->getVoc() . "authority", $authority_id) . parent::triplifyString($oversight_id, parent::getVoc() . "has-dmc", $this->getString('//has_dmc', $oversight)));
         } catch (Exception $e) {
             echo "There was an error in the oversight info element: {$e}\n";
         }
         #################################################################################
         # overall status
         #################################################################################
         $overall_status = $this->getString('//overall_status');
         if ($overall_status) {
             $status_id = parent::getRes() . md5($overall_status);
             parent::addRDF(parent::describeIndividual($status_id, $overall_status, parent::getVoc() . "Status") . parent::describeClass(parent::getVoc() . "Status", "Status") . parent::triplify($study_id, parent::getVoc() . "overall-status", $status_id));
         }
         #########################################################################################
         #why stopped
         #########################################################################################
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "why-stopped", $this->getString("//why_stopped")));
         ##################################################################################
         # dates
         ##################################################################################
         $dates = array("start_date", "end_date", "completion_date", "primary_completion_date", "verification_date", "lastchanged_date", "firstreceived_date", "firstreceived_results_date");
         foreach ($dates as $date) {
             $d = $this->getString('//' . $date);
             if ($d) {
                 $datetime = $this->getDatetimeFromDate($d);
                 if (isset($datetime)) {
                     parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . str_replace("_", "-", $date), $datetime));
                 } else {
                     trigger_error("unable to parse date: {$d}", E_USER_ERROR);
                 }
             }
         }
         ####################################################################################
         # phase
         ####################################################################################
         $phase = $this->getString('//phase');
         if ($phase && $phase != "N/A") {
             $phase_id = $this->getRes() . md5($phase);
             parent::addRDF(parent::describeIndividual($phase_id, $phase, parent::getVoc() . "Phase", $phase) . parent::describeClass(parent::getVoc() . "Phase", $phase) . parent::triplify($study_id, parent::getVoc() . "phase", $phase_id));
         }
         ###################################################################################
         # study type
         ####################################################################################
         $study_type = $this->getString('//study_type');
         if ($study_type) {
             $study_type_id = $this->getRes() . md5($study_type);
             parent::addRDF(parent::describeClass($study_type_id, $study_type, parent::getVoc() . "Study-Type") . parent::describeClass(parent::getVoc() . "Study-Type", "Study Type") . parent::triplify($study_id, parent::getVoc() . "study-type", $study_type_id));
         }
         ###############################################################################
         # study design
         ###############################################################################
         $study_design = $this->getString('//study_design');
         if ($study_design) {
             $study_design_id = parent::getRes() . md5($study_id . $study_design);
             parent::addRDF(parent::describeIndividual($study_design_id, "{$study_id} study design", parent::getVoc() . "Study-Design") . parent::describeClass(parent::getVoc() . "Study-Design", "Study Design") . parent::triplify($study_id, parent::getVoc() . "study-design", $study_design_id));
             // Intervention Model: Parallel Assignment, Masking: Double-Blind, Primary Purpose: Treatment
             foreach (explode(", ", $study_design) as $i => $b) {
                 $c = explode(":  ", $b);
                 if (isset($c[1])) {
                     $sdp = $study_design_id . "-" . ($i + 1);
                     $key = parent::getRes() . md5($c[0]);
                     $value = parent::getRes() . md5($c[1]);
                     parent::addRDF(parent::describeIndividual($sdp, $b, parent::getVoc() . "Study-Design-Parameter") . parent::describeClass(parent::getVoc() . "Study-Design-Parameter", "Study Design Parameter") . parent::triplify($sdp, parent::getVoc() . "key", $key) . parent::describeClass($key, $c[0]) . parent::triplify($sdp, parent::getVoc() . "value", $value) . parent::describeClass($value, $c[1]) . parent::triplify($study_design_id, parent::getVoc() . "study-design-parameter", $sdp));
                 }
             }
         }
         ####################################################################################
         # target duration
         ####################################################################################
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "target-duration", $this->getString('//target_duration')));
         ################################################################################
         # outcomes
         ###############################################################################
         $outcomes = array("primary_outcome", "secondary_outcome", "other_outcome");
         foreach ($outcomes as $outcome) {
             $o = $root->xpath('//' . $outcome);
             if ($o) {
                 $os = $o;
                 if (!is_array($o)) {
                     $os = array($o);
                 }
                 foreach ($os as $o) {
                     try {
                         $po_id = parent::getRes() . md5($nct_id . $o->asXML());
                         $po_type = parent::getVoc() . str_replace("_", "-", $outcome);
                         $measure = $this->getString('//measure', $o);
                         $time_frame = $this->getString('//time_frame', $o);
                         $safety_issue = $this->getString('//saftey_issue', $o);
                         $description = $this->getString('//description', $o);
                         parent::addRDF(parent::describeIndividual($po_id, $measure . " " . $time_frame, ucfirst($po_type)) . parent::describeClass(ucfirst($po_type), str_replace("_", " ", ucfirst($outcome))) . parent::triplifyString($po_id, "dc:description", $description) . parent::triplifyString($po_id, parent::getVoc() . "measure", $measure) . parent::triplifyString($po_id, parent::getVoc() . "time-frame", $time_frame) . parent::triplifyString($po_id, parent::getVoc() . "safety-issue", $safety_issue) . parent::triplify($study_id, parent::getVoc() . $po_type, $po_id));
                     } catch (Exception $e) {
                         echo "There was an error parsing the primary outcome element: {$e} \n";
                     }
                 }
             }
         }
         ##############################################################################
         #number of arms
         ##############################################################################
         try {
             parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "number-of-arms", $this->getString('//number_of_arms')));
         } catch (Exception $e) {
             echo "There was an exception parsing the number of arms element: {$e}\n";
         }
         ##############################################################################
         #number of groups
         ##############################################################################
         try {
             parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "number-of-arms", $this->getString('//number_of_groups')));
         } catch (Exception $e) {
             echo "There was an exception parsing the number of groups: {$e}\n";
         }
         ##############################################################################
         #enrollment
         ##############################################################################
         try {
             $e = $root->xpath('//enrollment');
             if ($e) {
                 $type = strtolower((string) $e[0]->attributes()->type);
                 $value = $this->getString('//enrollment');
                 parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . ($type ? $type . "-" : "") . "enrollment", $value));
             }
         } catch (Exception $e) {
             echo "There was an exception parsing the enrollment element: {$e}\n";
         }
         ###############################################################################
         #condition
         ###############################################################################
         try {
             $conditions = $root->xpath('//condition');
             foreach ($conditions as $condition) {
                 $mesh_label_id = parent::getRes() . md5($condition);
                 parent::addRDF(parent::triplify($study_id, parent::getVoc() . "condition", $mesh_label_id) . parent::describeClass($mesh_label_id, $condition, parent::getVoc() . "Condition") . parent::describeClass(parent::getVoc() . "Condition", "Condition"));
             }
         } catch (Exception $e) {
             echo "There was an exception parsing condition element: {$e}\n";
         }
         ################################################################################
         # arm_group
         ################################################################################
         try {
             $arm_groups = $root->xpath('//arm_group');
             foreach ($arm_groups as $arm_group) {
                 $arm_group_id = $this->getString('./arm_group_label', $arm_group);
                 $arm_group_id = md5($arm_group_id);
                 $arm_group_uri = parent::getRes() . $this->nct_id . "/arm-group/" . $arm_group_id;
                 $arm_group_label = $this->nct_id . " arm group " . $arm_group_id;
                 $arm_group_type = ucfirst(str_replace(" ", "_", $this->getString('./arm_group_type', $arm_group)));
                 if (!$arm_group_type) {
                     $arm_group_type = "Clinical-Arm";
                 }
                 $description = $this->getString('./description', $arm_group);
                 parent::addRDF(parent::describeIndividual($arm_group_uri, $arm_group_label, parent::getVoc() . $arm_group_type) . parent::describeClass(parent::getVoc() . $arm_group_type, ucfirst(str_replace("_", " ", $arm_group_type))) . parent::triplifyString($arm_group_uri, parent::getVoc() . "description", $description) . parent::describeIndividual($arm_group_uri, $arm_group, parent::getVoc() . "Arm-Group") . parent::describeClass(parent::getVoc() . "Arm-Group", "Arm Group") . parent::triplify($study_id, parent::getVoc() . "arm-group", $arm_group_uri));
             }
         } catch (Exception $e) {
             echo "There was an exception in arm groups: {$e}\n";
         }
         ##############################################################################
         #intervention
         ##############################################################################
         try {
             $interventions = $root->xpath('//intervention');
             foreach ($interventions as $intervention) {
                 $intervention_id = parent::getRes() . md5($intervention->asXML());
                 $intervention_name = $this->getString('./intervention_name', $intervention);
                 $intervention_type = $this->getString('./intervention_type', $intervention);
                 $intervention_type_uri = parent::getVoc() . ucfirst(str_replace(" ", "_", $intervention_type));
                 $intervention_desc = $this->getString('./description', $intervention);
                 $intervention_on = $this->getString('./other_name', $intervention);
                 parent::addRDF(parent::describeIndividual($intervention_id, $intervention_name, $intervention_type_uri) . parent::describeClass($intervention_type_uri, $intervention_type) . parent::triplifyString($intervention_id, parent::getVoc() . "intervention-name", $intervention_name) . parent::triplifyString($intervention_id, parent::getVoc() . "intervention-desc", $intervention_desc) . parent::triplifyString($intervention_id, parent::getVoc() . "other-name", $intervention_on) . parent::triplify($study_id, parent::getvoc() . "intervention", $intervention_id));
                 $agl = $intervention->xpath("./arm_group_label");
                 foreach ($agl as $a) {
                     $arm_group_id = md5($a);
                     $ag = parent::getRes() . $this->nct_id . "/arm-group/" . $arm_group_id;
                     parent::addRDF(parent::describeIndividual($ag, $a, parent::getVoc() . "Arm-Group") . parent::describeClass(parent::getVoc() . "Arm-Group", "Arm Group") . parent::triplify($intervention_id, parent::getVoc() . "arm-group", $ag));
                 }
             }
         } catch (Exception $e) {
             echo "There was an error in interventions {$e}\n";
         }
         ###############################################################################
         #eligibility
         ################################################################################
         try {
             $eligibility = @array_shift($root->xpath('//eligibility'));
             if ($eligibility !== null) {
                 $eligibility_label = "eligibility for " . $study_id;
                 $eligibility_id = parent::getRes() . md5($eligibility->asXML());
                 parent::addRDF(parent::describeIndividual($eligibility_id, $eligibility_label, parent::getVoc() . "Eligibility") . parent::describeClass(parent::getVoc() . "Eligibility", "Eligibility") . parent::triplify($study_id, parent::getVoc() . "eligibility", $eligibility_id));
                 if ($criteria = @array_shift($eligibility->xpath('./criteria'))) {
                     $text = @array_shift($criteria->xpath('./textblock'));
                     parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . "text", $text));
                     $c = preg_split("/(Inclusion Criteria\\:|Exclusion Criteria\\:)/", $text);
                     //inclusion
                     if (isset($c[1])) {
                         $d = explode(" - ", $c[1]);
                         // the lists are separated by a hyphen
                         foreach ($d as $inclusion) {
                             $inc = trim($inclusion);
                             if ($inc != '') {
                                 $inc_id = parent::getRes() . md5($inc);
                                 parent::addRDF(parent::describeIndividual($inc_id, $inc, parent::getVoc() . "Inclusion-Criteria") . parent::describeClass(parent::getVoc() . "Inclusion-Criteria", "Inclusion Criteria") . parent::triplify($eligibility_id, parent::getVoc() . "inclusion-criteria", $inc_id));
                             }
                         }
                     }
                     //exclusion
                     if (isset($c[2])) {
                         $d = explode(" - ", $c[1]);
                         foreach ($d as $exclusion) {
                             $exc = trim($exclusion);
                             if ($exc != '') {
                                 $exc_id = parent::getRes() . md5($exc);
                                 parent::addRDF(parent::describeIndividual($exc_id, $exc, parent::getVoc() . "Exclusion-Criteria") . parent::describeClass(parent::getVoc() . "Exclusion-Criteria", "Exclusion Criteria") . parent::triplify($eligibility_id, parent::getVoc() . "exclusion-criteria", $exc_id));
                             }
                         }
                     }
                 }
                 parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . "gender", $this->getString('./gender', $eligibility)));
                 parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . "healthy-volunteers", $this->getString('./healthy_volunteers', $eligibility)));
                 $attributes = array('minimum_age', 'maximum_age');
                 foreach ($attributes as $a) {
                     $s = $this->getString('./' . $a, $eligibility);
                     if ($s != 'N/A') {
                         $age = trim(str_replace("Years", "", $s));
                         parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . str_replace("_", "-", $a), $age));
                     }
                 }
                 $attributes = array("study_pop" => "study-population", "sampling_method" => "sampling-method");
                 foreach ($attributes as $a => $r) {
                     $e = @array_shift($eligibility->xpath('./' . $a));
                     if ($s = $this->getString('./' . $a, $eligibility)) {
                         parent::addRDF(parent::triplifyString($eligibility_id, parent::getVoc() . $r, $this->getString('./textblock', $e)));
                     }
                 }
             }
         } catch (Exception $e) {
             echo "There was an error in eligibility: {$e}\n";
         }
         ######################################################################################
         #biospec
         #####################################################################################
         parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "biospec-retention", $this->getString('//biospec_retention')));
         try {
             $b = @array_shift($root->xpath('//biospec_descr'));
             if ($b) {
                 parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "biospec_descr", $this->getString('./textblock', $b)));
             }
         } catch (Exception $e) {
             echo "There was an error in biospec_descr: {$e}\n";
         }
         ###################################################################
         # contacts
         ###################################################################
         $contacts = array("overall_official", "overall_contact", "overall_contact_backup");
         try {
             foreach ($contacts as $c) {
                 $d = @array_shift($root->xpath('//' . $c));
                 if ($d) {
                     parent::addRDF(parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $c), $this->makeContact($d)));
                 }
             }
         } catch (Exception $e) {
             echo "There was an error parsing overall contact: {$e}" . "\n";
         }
         ##############################################################
         # location of facility doing the testing
         ##############################################################
         try {
             $location = @array_shift($root->xpath('//location'));
             if ($location) {
                 $location_uri = parent::getRes() . md5($location->asXML());
                 $name = $this->getString('//facility/name', $location);
                 $address = @array_shift($location->xpath('//facility/address'));
                 $contact = @array_shift($location->xpath('//contact'));
                 $backups = @array_shift($location->xpath('//contact_backup'));
                 $investigators = @array_shift($location->xpath('//investigator'));
                 parent::addRDF(parent::describeIndividual($location_uri, $name, parent::getVoc() . "Location") . parent::describeClass(parent::getVoc() . "Location", "Location") . parent::triplifyString($location_uri, parent::getVoc() . "status", $this->getString('//status', $location)) . parent::triplify($study_id, parent::getVoc() . "location", $location_uri) . parent::triplify($location_uri, parent::getVoc() . "address", $this->makeAddress($address)) . ($contact != null ? parent::triplify($location_uri, parent::getVoc() . "contact", $this->makeContact($contact)) : ""));
                 if ($backups) {
                     foreach ($backups as $backup) {
                         parent::addRDF(parent::triplify($location_uri, parent::getVoc() . "contact-backup", $this->makeContact($backup)));
                     }
                 }
                 if ($investigators) {
                     foreach ($investigators as $investigator) {
                         parent::addRDF(parent::triplify($location_uri, parent::getVoc() . "investigator", $this->makeContact($investigator)));
                     }
                 }
             }
         } catch (Exception $e) {
             echo "There was an error parsing location: {$e}" . "\n";
         }
         ######################################################################
         #countries
         ######################################################################
         try {
             $a = array("location_countries", "removed_countries");
             foreach ($a as $country) {
                 $lc = @array_shift($root->xpath('//' . $country));
                 if ($lc) {
                     $label = $this->getString('//country', $lc);
                     $cid = parent::getRes() . md5($label);
                     parent::addRDF(parent::describeIndividual($cid, $label, parent::getVoc() . "Country") . parent::describeClass(parent::getVoc() . "Country", "Country") . parent::triplify($study_id, parent::getVoc() . "country", $cid));
                 }
             }
         } catch (Exception $e) {
             echo "There was an error parsing country: {$e}" . "\n";
         }
         ######################################################################
         #reference
         ######################################################################
         try {
             $a = array("reference", "result_reference");
             foreach ($a as $ref_type) {
                 $references = $root->xpath('//' . $ref_type);
                 foreach ($references as $reference) {
                     $p = $this->getString('./PMID', $reference);
                     if ($p) {
                         $pmid = "pubmed:{$p}";
                         parent::addRDF(parent::describeIndividual($pmid, $p, parent::getVoc() . "Reference") . parent::describeClass(parent::getVoc() . "Reference", "Reference") . parent::triplifyString($pmid, parent::getVoc() . "citation", $this->getString('./citation', $reference)) . parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $ref_type), $pmid));
                     }
                 }
             }
         } catch (Exception $e) {
             echo "There was an error parsing references element: {$e}\n";
         }
         #######################################################################
         #link
         #######################################################################
         try {
             $links = $root->xpath('//link');
             foreach ($links as $i => $link) {
                 $url = $this->getString('./url', $link);
                 $url = preg_replace("/>.*\$/", "", $url);
                 $lid = parent::getRes() . md5($url);
                 parent::addRDF(parent::describeIndividual($lid, $this->getString('./description', $link), parent::getVoc() . "Link") . parent::describeClass(parent::getVoc() . "Link", "Link") . parent::triplify($lid, parent::getVoc() . "url", $url) . parent::triplify($study_id, parent::getVoc() . "link", $lid));
             }
         } catch (Exception $e) {
             echo "There was an error parsing link element: {$e}\n";
         }
         ############################################################################
         #responsible party
         ############################################################################
         try {
             $rp = @array_shift($root->xpath('//responsible_party'));
             if ($rp) {
                 $rp_id = parent::getRes() . md5($rp->asXML());
                 $label = $this->getString('./name_title', $rp);
                 if (!$label) {
                     $label = $this->getString('./organization', $rp);
                 } else {
                     $label .= ", " . $this->getString('./organization', $rp);
                 }
                 if (!$label) {
                     $label = $this->getString('./party_type', $rp);
                 }
                 $org_id = parent::getRes() . md5($this->getString('./organization', $rp));
                 parent::addRDF(parent::describeIndividual($rp_id, $label, parent::getVoc() . "Responsible-Party") . parent::describeClass(parent::getVoc() . "Responsible-Party", "Responsible Party") . parent::triplify($study_id, parent::getVoc() . "responsible-party", $rp_id) . parent::triplify($rp_id, parent::getVoc() . "organization", $org_id) . parent::describeIndividual($org_id, $this->getString('./organization', $rp), parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplifyString($rp_id, parent::getVoc() . "name-title", $this->getString('./name_title', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "party-type", $this->getString('./party_type', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "investigator-affiliation", $this->getString('./investigator_affiliation', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "investigator-full-name", $this->getString('./investigator_full_name', $rp)) . parent::triplifyString($rp_id, parent::getVoc() . "investigator-title", $this->getString('./investigator_title', $rp)));
             }
         } catch (Exception $e) {
             echo "There was an error parsing the responsible_party element: {$e}\n";
         }
         ##############################################################################
         # keywords
         ##############################################################################
         try {
             $keywords = $root->xpath('//keyword');
             foreach ($keywords as $keyword) {
                 parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "keyword", (string) $keyword));
             }
         } catch (Exception $e) {
             echo "There was an error parsing the keywords element: {$e}";
         }
         # mesh terms
         # note: mesh terms are assigned using an imperfect algorithm
         try {
             $mesh_terms = $root->xpath('//condition_browse/mesh_term');
             foreach ($mesh_terms as $mesh_term) {
                 $term = (string) $mesh_term;
                 $mesh_id = parent::getRes() . md5($term);
                 parent::addRDF(parent::triplify($study_id, parent::getVoc() . "condition-mesh", $mesh_id));
                 parent::addRDF(parent::triplifyString($mesh_id, "rdfs:label", $term));
             }
         } catch (Exception $e) {
             echo "There was an error in mesh_terms: {$e}\n";
         }
         ################################################################################
         # regulated by fda?  is section 801? has expanded access?
         ################################################################################
         try {
             parent::addRDF(parent::triplifyString($study_id, parent::getVoc() . "is-fda-regulated", $this->getString('is_fda_regulated')) . parent::triplifyString($study_id, parent::getVoc() . "is-section-801", $this->getString('is_section_801')) . parent::triplifyString($study_id, parent::getVoc() . "has-expanded-access", $this->getString('has_expanded_access')));
         } catch (Exception $e) {
             echo "There was an error parsing the is_fda_regulated element: {$e}\n";
         }
         ###############################################################################
         # mesh terms for the intervention browse
         ###############################################################################
         try {
             $a = array("condition_browse", "intervention_browse");
             foreach ($a as $browse_type) {
                 $terms = $root->xpath("//{$browse_type}/mesh_term");
                 foreach ($terms as $term) {
                     $term_label = (string) $term;
                     $term_id = parent::getRes() . md5($term);
                     parent::addRDF(parent::describeIndividual($term_id, $term_label, parent::getVoc() . "Term") . parent::describeClass(parent::getVoc() . "Term", "Term") . parent::triplify($study_id, parent::getVoc() . str_replace("_", "-", $browse_type), $term_id));
                 }
             }
         } catch (Exception $e) {
             echo "There was an error parsing {$browse_type}/mesh_term element: {$e}\n";
         }
         ################################################################################
         # clinical results
         ################################################################################
         try {
             $cr = @array_shift($root->xpath('//clinical_results'));
             if ($cr) {
                 $cr_id = parent::getRes() . md5($study_id . $cr->asXML());
                 parent::addRDF(parent::describeIndividual($cr_id, "clinical results for {$study_id}", parent::getVoc() . "Clinical-Result") . parent::describeClass(parent::getVoc() . "Clinical-Result", "Clinical Result") . parent::triplifyString($cr_id, parent::getVoc() . "description", $this->getString('./desc', $cr)) . parent::triplifyString($cr_id, parent::getVoc() . "restrictive-agreement", $this->getString('./restrictive_agreement', $cr)) . parent::triplifyString($cr_id, parent::getVoc() . "limitations-and-caveats", $this->getString('./limitations_and_caveats', $cr)) . parent::triplify($study_id, parent::getVoc() . "clinical-result", $cr_id));
             }
         } catch (Exception $e) {
             echo "There was an error parsing clinical results: {$e}\n";
         }
         ################################################################################
         # Participant Flow
         ################################################################################
         try {
             $pc = 1;
             $mc = 1;
             $wc = 1;
             $pf = @array_shift($root->xpath('//clinical_results/participant_flow'));
             if ($pf) {
                 $pf_id = parent::getRes() . md5($pf->asXML());
                 parent::addRDF(parent::describeIndividual($pf_id, "participant flow for {$study_id}", parent::getVoc() . "Participant-Flow") . parent::describeClass(parent::getVoc() . "Participant-Flow", "Participant-Flow") . parent::triplify($study_id, parent::getVoc() . "participant-flow", $pf_id) . parent::triplifyString($pf_id, parent::getVoc() . "recruitment-details", $this->getString('./recruitment_details', $pf)) . parent::triplifyString($pf_id, parent::getVoc() . "pre-assignment-details", $this->getString('./pre_assignment_details', $pf)));
                 $groups = @array_shift($pf->xpath('./group_list'));
                 foreach ($groups as $group) {
                     parent::addRDF(parent::triplify($pf_id, parent::getVoc() . "group", $this->makeGroup($group)));
                 }
                 //period_list
                 $periods = @array_shift($pf->xpath('./period_list'));
                 foreach ($periods as $period) {
                     $period_id = parent::getRes() . $nct_id . "/period/" . $pc++;
                     $period_title = $this->getString('./title', $period);
                     parent::addRDF(parent::describeIndividual($period_id, $period_title . " for {$nct_id}", parent::getVoc() . "Period") . parent::describeClass(parent::getVoc() . "Period", "Period") . parent::triplify($pf_id, parent::getVoc() . "period", $period_id));
                     // milestones
                     $milestones = @array_shift($period->xpath('./milestone_list'));
                     if ($milestones) {
                         foreach ($milestones as $milestone) {
                             $milestone_id = parent::getRes() . $nct_id . "/milestone/" . $mc++;
                             $label = $this->getString('./title', $milestone);
                             parent::addRDF(parent::describeIndividual($milestone_id, $label, parent::getVoc() . "Milestone") . parent::describeClass(parent::getVoc() . "Milestone", "Milestone") . parent::triplify($period_id, parent::getVoc() . "milestone", $milestone_id));
                             // participants
                             $p = 1;
                             $ps_list = @array_shift($milestone->xpath('./participants_list'));
                             foreach ($ps_list as $ps) {
                                 $ps_id = $milestone_id . "/p/" . $p++;
                                 $group_id = parent::getRes() . $this->nct_id . "/group/" . $ps->attributes()->group_id;
                                 $count = (string) $ps->attributes()->count;
                                 parent::addRDF(parent::describeIndividual($ps_id, "participant counts in " . $ps->attributes()->group_id . " for milestone {$mc} of {$nct_id}", parent::getVoc() . "Participant-Count") . parent::describeClass(parent::getVoc() . "Participant-Count", "Participant Count") . parent::triplify($ps_id, parent::getVoc() . "group", $group_id) . parent::triplifyString($ps_id, parent::getVoc() . "count", $count) . parent::triplify($milestone_id, parent::getVoc() . "participant-counts", $ps_id));
                             }
                         }
                     }
                     // milestones
                     $withdraws = @array_shift($period->xpath('./drop_withdraw_reason_list'));
                     if ($withdraws) {
                         foreach ($withdraws as $withdraw) {
                             $wid = parent::getRes() . $this->nct_id . "/withdraw/" . $wc++;
                             $label = $this->getString('./title', $withdraw);
                             parent::addRDF(parent::describeIndividual($wid, $label, parent::getVoc() . "Withdraw-Reason") . parent::describeClass(parent::getVoc() . "Withdraw-Reason", "Withdraw Reason"));
                             // participants
                             $ps_list = @array_shift($withdraw->xpath('./participants_list'));
                             foreach ($ps_list as $ps) {
                                 $group_id = parent::getRes() . $nct_id . "/group/" . $ps->attributes()->group_id;
                                 $count = (string) $ps->attributes()->count;
                                 parent::addRDF(parent::triplify($wid, parent::getVoc() . "group", $group_id) . parent::triplifyString($wid, parent::getVoc() . "count", $count));
                             }
                         }
                     }
                 }
             }
         } catch (Exception $e) {
             echo "There was an error parsing participant flow element: {$e}\n";
         }
         ################################################################################
         # baseline
         ################################################################################
         try {
             $baseline = @array_shift($root->xpath('//baseline'));
             if ($baseline) {
                 $b_id = $this->nct_id . "/baseline";
                 $b_uri = parent::getRes() . $b_id;
                 // group list
                 $groups = @array_shift($baseline->xpath('./group_list'));
                 foreach ($groups as $group) {
                     parent::addRDF(parent::describeIndividual($b_uri, "baseline for {$nct_id}", parent::getVoc() . "Baseline") . parent::describeClass(parent::getVoc() . "Baseline", "Baseline") . parent::triplify($b_uri, parent::getVoc() . "group", $this->makeGroup($group)) . parent::triplify($study_id, parent::getVoc() . "baseline", $b_uri));
                 }
                 // measure list
                 $measures = @array_shift($baseline->xpath('./measure_list'));
                 foreach ($measures as $measure) {
                     parent::addRDF(parent::triplify($b_uri, parent::getVoc() . "measure", $this->makeMeasure($measure)));
                 }
             }
         } catch (Exception $e) {
             echo "Error in parsing baseline" . PHP_EOL;
         }
         ################################################################################
         # outcomes
         ################################################################################
         try {
             $outcomes = @array_shift($root->xpath('//outcome_list'));
             if ($outcomes) {
                 foreach ($outcomes as $i => $outcome) {
                     $outcome_id = $this->nct_id . "/outcome/" . ($i + 1);
                     $outcome_uri = parent::getRes() . $outcome_id;
                     $outcome_label = $this->getString("./title", $outcome);
                     if (!$outcome_label) {
                         $outcome_label = "outcome for " . $this->nct_id;
                     }
                     parent::addRDF(parent::describeIndividual($outcome_uri, $outcome_label, parent::getVoc() . "Outcome", $this->getString("./description", $outcome)) . parent::describeClass(parent::getVoc() . "Outcome", "Outcome") . parent::triplify($study_id, parent::getVoc() . "outcome", $outcome_uri) . parent::triplifyString($outcome_uri, parent::getVoc() . "type", $this->getString("./type", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "time-frame", $this->getString("./time_frame", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "safety-issue", $this->getString("./safety_issue", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "posting-date", $this->getString("./posting-date", $outcome)) . parent::triplifyString($outcome_uri, parent::getVoc() . "population", $this->getString("./population", $outcome)));
                     $groups = @array_shift($outcome->xpath('./group_list'));
                     if ($groups) {
                         foreach ($groups as $group) {
                             parent::addRDF(parent::triplify($outcome_uri, parent::getVoc() . "group", $this->makeGroup($group)));
                         }
                     }
                     // measure list
                     $measures = @array_shift($outcome->xpath('./measure_list'));
                     if ($measures) {
                         foreach ($measures as $measure) {
                             parent::addRDF(parent::triplify($outcome_uri, parent::getVoc() . "measure", $this->makeMeasure($measure)));
                         }
                     }
                     // analysis list
                     $analyses = @array_shift($outcome->xpath('./analysis_list'));
                     if ($analyses) {
                         foreach ($analyses as $analysis) {
                             parent::addRDF(parent::triplify($outcome_uri, parent::getVoc() . "analysis", $this->makeAnalysis($analysis)));
                         }
                     }
                 }
             }
         } catch (Exception $e) {
             echo "Error in parsing outcomes" . PHP_EOL;
         }
         ################################################################################
         # events
         ################################################################################
         try {
             $c_ev = $c_c = 1;
             $reported_events = @array_shift($root->xpath('//reported_events'));
             if ($reported_events) {
                 $rp_id = parent::getRes() . md5($reported_events->asXML());
                 $groups = @array_shift($reported_events->xpath('./group_list'));
                 parent::addRDF(parent::describeIndividual($rp_id, "Reported events for {$nct_id}", parent::getVoc() . "Reported-Events") . parent::describeClass(parent::getVoc() . "Reported-Events", "Reported Events") . parent::triplify($study_id, parent::getVoc() . "reported-events", $rp_id));
                 foreach ($groups as $group) {
                     parent::addRDF(parent::triplify($rp_id, parent::getVoc() . "group", $this->makeGroup($group)));
                 }
                 // events
                 $event_list = array("serious_events" => "Serious Event", "other_events" => "Other Event");
                 foreach ($event_list as $ev => $ev_label) {
                     $et = @array_shift($reported_events->xpath('./' . $ev));
                     if (!$et) {
                         continue;
                     }
                     $ev_uri = parent::getVoc() . str_replace(" ", "-", $ev_label);
                     $categories = @array_shift($et->xpath('./category_list'));
                     foreach ($categories as $category) {
                         $major_title = $this->getString('./title', $category);
                         $major_title_uri = parent::getRes() . md5($major_title);
                         $events = @array_shift($category->xpath('./event_list'));
                         foreach ($events as $event) {
                             $e_uri = parent::getRes() . $this->nct_id . "/{$ev}/" . $c_ev++;
                             $subtitle = (string) $this->getString('./sub_title', $event) . " for " . $this->nct_id;
                             $subtitle_uri = parent::getRes() . md5($subtitle);
                             parent::addRDF(parent::describeIndividual($e_uri, $subtitle, $ev_uri) . parent::describeClass($ev_uri, $ev_label) . parent::triplify($e_uri, parent::getVoc() . "sub-title", $subtitle_uri) . parent::describeIndividual($subtitle_uri, $subtitle, parent::getVoc() . "Event") . parent::describeClass(parent::getVoc() . "Event", "Event") . parent::triplify($e_uri, parent::getVoc() . "major-title", $major_title_uri) . parent::describeClass($major_title_uri, $major_title) . parent::triplify($rp_id, parent::getVoc() . str_replace("_", "-", $ev), $e_uri));
                             $counts = $event->xpath('./counts');
                             foreach ($counts as $c) {
                                 $group_id = $c->attributes()->group_id;
                                 $group_uri = parent::getRes() . $nct_id . "/group/" . $group_id;
                                 $c_uri = $e_uri . "/count/" . $c_c++;
                                 parent::addRDF(parent::describeIndividual($c_uri, $subtitle . " for " . $group_id . " in " . $this->nct_id, parent::getVoc() . "Event-Count") . parent::describeClass(parent::getVoc() . "Event-Count", "Event Count") . parent::triplify($c_uri, parent::getVoc() . "group", $group_uri) . parent::triplify($e_uri, parent::getVoc() . "count", $c_uri) . parent::triplifyString($c_uri, parent::getVoc() . "default-vocabulary", $this->getString('./default_vocab', $et)) . parent::triplifyString($c_uri, parent::getVoc() . "frequency-threshold", $this->getString('./frequency_threshold', $et)) . parent::triplifyString($c_uri, parent::getVoc() . "default-assessment", $this->getString('./default_assessment', $et)) . parent::triplifyString($c_uri, parent::getVoc() . "number-events", $c->attributes()->events) . parent::triplifyString($c_uri, parent::getVoc() . "subjects-affected", $c->attributes()->subjects_affected) . parent::triplifyString($c_uri, parent::getVoc() . "subjects-at-risk", $c->attributes()->subjects_at_risk));
                             }
                         }
                     }
                 }
             }
         } catch (Exception $e) {
             echo "Error in parsing reported events" . PHP_EOL;
         }
         parent::writeRDFBufferToWriteFile();
     }
     $this->setCheckPoint('record');
     $this->setCheckPoint('dataset');
 }
Beispiel #2
0
 function Parse($xml)
 {
     // state the dataset info
     foreach ($xml->release->dbinfo as $o) {
         $db = $o->attributes()->dbname . " v" . $o->attributes()->version . " (" . $o->attributes()->entry_count . " entries) [" . $o->attributes()->file_date . "]";
         parent::addRDF(parent::triplifyString(parent::getDatasetURI(), parent::getVoc() . "contains", $db));
         if ((string) $o->attributes()->dbname === "INTERPRO") {
             parent::setDatasetVersion($o->attributes()->version);
         }
     }
     // get a potential id list
     if (parent::getParameterValue("id_list") != '') {
         $id_list = explode(",", parent::getParameterValue("id_list"));
     }
     // now interate over the entries
     foreach ($xml->interpro as $o) {
         parent::writeRDFBufferToWriteFile();
         $interpro_id = $o->attributes()->id;
         if (isset($id_list) && !in_array($interpro_id, $id_list)) {
             continue;
         }
         echo "Processing {$interpro_id}" . PHP_EOL;
         $name = $o->name;
         $short_name = $o->attributes()->short_name;
         $type = $o->attributes()->type;
         $s = parent::getNamespace() . $interpro_id;
         //echo "Adding... $s rdfs:label $name ($short_name) $type [$s]".PHP_EOL;
         parent::addRDF(parent::describeIndividual($s, "{$name} ({$short_name}) {$type}", parent::getVoc() . $type));
         // get the pubs
         unset($pubs);
         foreach ($o->pub_list->publication as $p) {
             $pid = (string) $p->attributes()->id;
             if (isset($p->db_xref)) {
                 if ($p->db_xref->attributes()->db == "PUBMED") {
                     $pmid = (string) $p->db_xref->attributes()->dbkey;
                     $pubs['pid'][] = '<cite idref="' . $pid . '"/>';
                     $pubs['pmid'][] = '<a href="http://www.ncbi.nlm.nih.gov/pubmed/' . $pmid . '">pubmed:' . $pmid . '</a>';
                     parent::addRDF(parent::triplify($s, parent::getVoc() . "x-pubmed", "pubmed:{$pmid}"));
                 }
             }
         }
         $abstract = (string) $o->abstract->p->asXML();
         if (isset($pubs)) {
             $abstract = str_replace($pubs['pid'], $pubs['pmid'], $abstract);
         }
         parent::addRDF(parent::triplifyString($s, "dc:description", $abstract));
         if (isset($o->example_list)) {
             foreach ($o->example_list->example as $example) {
                 $db = (string) $example->db_xref->attributes()->db;
                 $id = (string) $example->db_xref->attributes()->dbkey;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "example-entry", "{$db}:{$id}"));
             }
         }
         if (isset($o->parent_list->rel_ref)) {
             foreach ($o->parent_list->rel_ref as $parent) {
                 $id = (string) $parent->attributes()->ipr_ref;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "parent", "interpro:{$id}"));
             }
         }
         if (isset($o->child->rel_ref)) {
             foreach ($o->child->rel_ref as $child) {
                 $id = (string) $child->attributes()->ipr_ref;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "child", "interpro:{$id}"));
             }
         }
         if (isset($o->contains->rel_ref)) {
             foreach ($o->contains->rel_ref as $contains) {
                 $id = (string) $contains->attributes()->ipr_ref;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "contains", "interpro:{$id}"));
             }
         }
         if (isset($o->found_in->rel_ref)) {
             foreach ($o->found_in->rel_ref as $f) {
                 $id = (string) $f->attributes()->ipr_ref;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "found-in", "interpro:{$id}"));
             }
         }
         if (isset($o->sec_list->sec_ac)) {
             foreach ($o->sec_ac as $s) {
                 $id = (string) $s->attributes()->acc;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "secondary-accession", "interpro:{$id}"));
             }
         }
         // xrefs
         if (isset($o->member_list->dbxref)) {
             foreach ($o->member_list->db_xref as $dbxref) {
                 $db = (string) $dbxref->attributes()->db;
                 $id = (string) $dbxref->attributes()->dbkey;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}"));
             }
         }
         if (isset($o->external_doc_list)) {
             foreach ($o->external_doc_list->db_xref as $dbxref) {
                 $db = (string) $dbxref->attributes()->db;
                 $id = (string) $dbxref->attributes()->dbkey;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}"));
             }
         }
         if (isset($o->structure_db_links->db_xref)) {
             foreach ($o->structure_db_links->db_xref as $dbxref) {
                 $db = (string) $dbxref->attributes()->db;
                 $id = (string) $dbxref->attributes()->dbkey;
                 parent::addRDF(parent::triplify($s, parent::getVoc() . "x-" . strtolower($db), "{$db}:{$id}"));
             }
         }
         // taxon distribution
         foreach ($o->taxonomy_distribution->taxon_data as $t) {
             $organism = (string) $t->attributes()->name;
             $number = (string) $t->attributes()->proteins_count;
             parent::addRDF(parent::triplifyString($s, parent::getVoc() . "taxon-distribution", "{$organism} ({$number})"));
         }
     }
 }
Beispiel #3
0
 private function gene2accession()
 {
     $this->getReadFile()->read(200000);
     $header = array(0 => array('rel' => "x-taxonomy", 'ns' => "taxonomy"), 1 => array('rel' => "ncbigene", 'ns' => "ncbigene"), 2 => array('rel' => "status"), 3 => array('rel' => "rna-nucleotide-accession.version", 'ns' => "genbank"), 4 => array('rel' => "rna-nucleotide-gi", 'ns' => "gi"), 5 => array('rel' => "protein-accession.version", 'ns' => "genbank"), 6 => array('rel' => "protein-gi", 'ns' => "gi"), 7 => array('rel' => "genomic-nucleotide-accession.version", 'ns' => "genbank"), 8 => array('rel' => "genomic-nucleotide-gi", 'ns' => "gi"), 9 => array('rel' => "genomic-start-position"), 10 => array('rel' => "genomic-end-position"), 11 => array('rel' => "orientation"), 12 => array('rel' => "assembly"), 13 => array('rel' => "mature-peptide-accession.version", 'ns' => "genbank"), 14 => array('rel' => "mature-peptide-gi", 'ns' => "gi"), 15 => array('rel' => "symbol"));
     //(tab is used as a separator, pound sign - start of a comment) */
     $z = 1;
     while ($l = $this->getReadFile()->read(200000)) {
         if ($l[0] == "#") {
             continue;
         }
         if ($z++ % 10000 == 0) {
             echo $z . PHP_EOL;
             parent::clear();
         }
         $a = explode("\t", rtrim($l));
         if (count($a) != 16) {
             trigger_error("gene2accession: expecting 16 columns, found " . count($a) . " instead", E_USER_ERROR);
         }
         $taxid = $a[0];
         if (isset($this->taxids) and !isset($this->taxids[$taxid])) {
             continue;
         }
         $id = parent::getNamespace() . $a[1];
         $refseq = false;
         if ($a[2] != '-') {
             $refseq = true;
         }
         if ($a[9] != '-' and $a[10] != '-') {
             $region = parent::getRes() . $a[7] . "/" . $a[9] . "-" . $a[10];
             $start_pos = parent::getRes() . $a[7] . "/" . $a[9];
             $stop_pos = parent::getRes() . $a[7] . "/" . $a[10];
             if ($a[11] == "+") {
                 $orientation = "faldo:ForwardStrandPosition";
             } else {
                 if ($a[11] == "-") {
                     $orientation = "faldo:ReverseStrandPosition";
                 } else {
                     $orientation = "faldo:StrandedPosition";
                 }
             }
             parent::addRDF(parent::describeIndividual($region, "location of ncbigene:" . $a[1] . " on " . $a[7], "faldo:Region") . parent::describeIndividual($start_pos, "start of ncbigene:" . $a[1] . " on " . $a[7], "faldo:ExactPosition") . parent::describeIndividual($stop_pos, "stop position of ncbigene:" . $a[1] . " on " . $a[7], "faldo:ExactPosition") . parent::triplify($id, "faldo:location", $region) . parent::triplify($region, "faldo:begin", $start_pos) . parent::triplify($start_pos, "rdf:type", $orientation) . parent::triplifyString($start_pos, "faldo:position", $a[9], "xsd:integer") . parent::triplify($start_pos, "faldo:reference", "refseq:" . $a[7]) . parent::triplify($region, "faldo:end", $stop_pos) . parent::triplify($stop_pos, "rdf:type", $orientation) . parent::triplifyString($stop_pos, "faldo:position", $a[10], "xsd:integer") . parent::triplify($stop_pos, "faldo:reference", "refseq:" . $a[7]));
         }
         foreach ($header as $i => $v) {
             if ($a[$i] == "-") {
                 continue;
             }
             if ($i == 1 or $i == 9 or $i == 10 or $i == 11) {
                 continue;
             }
             /// ncbigene
             if (isset($v['ns'])) {
                 $ns = $v['ns'];
                 if ($ns == 'genbank' and $refseq == true) {
                     $ns = 'refseq';
                 }
                 parent::addRDF(parent::triplify($id, parent::getVoc() . $v['rel'], "{$ns}:" . $a[$i]));
             } else {
                 parent::addRDF(parent::triplifyString($id, parent::getVoc() . $v['rel'], $a[$i]));
             }
         }
         parent::writeRDFBufferToWriteFile();
     }
     //while
 }
Beispiel #4
0
 function gene_interactions()
 {
     while ($l = parent::getReadFile()->Read()) {
         if ($l[0] == '#') {
             continue;
         }
         $data = explode("\t", $l);
         if (count($data) != 11) {
             trigger_error("Found " . count($data) . " columns, expecting 11");
             continue;
         }
         $interaction = $data[0];
         $interaction_type = str_replace("_", "-", $data[1]);
         $interaction_type_label = str_replace("_", " ", $data[1]);
         $int_additional_info = $data[2];
         $gene1 = $data[5];
         $gene2 = $data[8];
         $interaction_id = parent::getNamespace() . $interaction;
         if ($interaction_type == "Genetic") {
             $int_pred = parent::getVoc() . "genetically-interacts-with";
         } elseif ($interaction_type == "Physical") {
             $int_pred = parent::getVoc() . "physically-interacts-with";
         } elseif ($interaction_type == "Predicted") {
             $int_pred = parent::getVoc() . "predicted-to-interact-with";
         } elseif ($interaction_type == "Regulatory") {
             $int_pred = parent::getVoc() . "regulates";
         }
         //elseif
         if ($int_additional_info == "No_interaction") {
             $interaction_label = "No " . strtolower($interaction_type) . " interaction between " . $gene1 . " and " . $gene2;
             parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, parent::getVoc() . $interaction_type . "-Non-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Non-Interaction", $interaction_type_label . " non-interaction") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2));
             $npa_id = parent::getRes() . md5($interaction_id . "negative property assertion");
             $npa_label = "Negative property assertion stating that " . $gene1 . " and " . $gene2 . " do not have a " . $interaction_type_label . " interaction";
             parent::addRDF(parent::describeIndividual($npa_id, $npa_label, "owl:NegativeObjectPropertyAssertion") . parent::triplify($npa_id, "owl:sourceIndividual", parent::getNamespace() . $gene1) . parent::triplify($npa_id, "owl:targetIndividual", parent::getNamespace() . $gene2) . parent::triplify($npa_id, "owl:assertionProperty", $int_pred));
         } elseif ($int_additional_info == "N/A" || $int_additional_info == "Genetic_interaction") {
             $interaction_label = $interaction_type . " interaction between " . $gene1 . " and " . $gene2;
             parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, parent::getVoc() . $interaction_type . "-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Interaction", $interaction_type_label . " Interaction") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2) . parent::triplify(parent::getNamespace() . $gene1, $int_pred, parent::getNamespace() . $gene2));
         } else {
             $interaction_label = ($int_additional_info != "" ? $int_additional_info . " " : "") . strtolower($interaction_type) . " interaction between " . $gene1 . " and " . $gene2;
             $type = parent::getVoc() . ($int_additional_info != "" ? $int_additional_info . "-" : "") . $interaction_type . "-Interaction";
             $type_label = ($int_additional_info != "" ? $int_additional_info . " " : "") . $interaction_type_label . " Interaction";
             parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, $type) . parent::describeClass($type, $type_label, parent::getVoc() . $interaction_type . "-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Interaction", $interaction_type . " Interation") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2) . parent::triplify(parent::getNamespace() . $gene1, $int_pred, parent::getNamespace() . $gene2));
         }
         //else
         parent::WriteRDFBufferToWriteFile();
     }
     //while
 }
Beispiel #5
0
 function pathways()
 {
     // needs to be finished
     return;
     while ($l = $this->getReadFile()->read(50000)) {
         $a = explode("\t", trim($l));
         // From	To	Reaction Type	Controller	Control Type	Cell Type	PubMed Id	Genes	Drugs	Diseases
         // hmg coa reductase inhibitors	Active & Inactive metabolites	Biochemical Reaction	CYP2C19,CYP2C8,CYP2C9,CYP2D6,CYP3A4,CYP3A5,UGT1A1,UGT1A3,UGT2B7	Catalysis	hepatocyte		CYP3A4,CYP3A5,UGT1A3,CYP2C19,CYP2C9,CYP2C8,CYP2D6,UGT1A1,UGT2B7	hmg coa reductase inhibitors
         $c1 = array_search($a[0], $this->drugs);
         if ($c1 === FALSE) {
             $c1 = array_search($a[0], $this->genes);
             if ($c1 === FALSE) {
                 $c1 = parent::getRes() . url_encode($c1);
             } else {
                 $c1 = parent::getNamespace() . $c1;
             }
         }
         $c2 = array_search($a[1], $this->drugs);
         if ($c2 === FALSE) {
             $c2 = array_search($a[1], $this->genes);
             if ($c2 === FALSE) {
                 // not found
                 $c2 = parent::getRes() . url_encode($c2);
             } else {
                 // actual id
                 $c2 = parent::getNamespace() . $c2;
             }
         }
         $id = md5($l);
         $uri = parent::getRes() . $id;
         parent::writeRDFBufferToWriteFile();
     }
 }
Beispiel #6
0
 function product($fpin)
 {
     $z = 0;
     $list = '';
     fgets($fpin);
     // header
     while ($l = fgets($fpin, 100000)) {
         $a = explode("\t", $l);
         if (count($a) != 18) {
             trigger_error("Expected 18 coloumns, instead found" . count($a));
             continue;
         }
         $product_id = parent::getNamespace() . $a[0];
         $product_label = $a[3];
         $product_type_label = ucfirst(strtolower($a[2]));
         $product_type = parent::getVoc() . str_replace(" ", "-", $product_label);
         parent::addRDF(parent::describeIndividual($product_id, $a[3], parent::getVoc() . "Product") . parent::describeClass(parent::getVoc() . "Product", "NDC Product") . parent::triplify($product_id, parent::getVoc() . "product-type", $product_type) . parent::describeIndividual($product_type, $product_type_label, parent::getVoc() . "Product-Type") . parent::describeClass(parent::getVoc() . "Product-Type", "Product Type") . parent::triplifyString($product_id, parent::getVoc() . "product-id", $a[1]) . parent::triplifyString($product_id, parent::getVoc() . "proprietary-name", $a[3]) . parent::triplifyString($product_id, parent::getVoc() . "trade-name-suffix", $a[4]));
         if ($a[5]) {
             $b = explode(";", $a[5]);
             foreach ($b as $c) {
                 parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "non-proprietary-name", trim($c)));
             }
         }
         if ($a[6]) {
             $b = explode(",", $a[6]);
             foreach ($b as $c) {
                 $dosageform = strtolower($c);
                 $dosageform_id = parent::getVoc() . str_replace(" ", "-", ucfirst(strtolower($c)));
                 parent::addRDF(parent::describeIndividual($dosageform_id, $dosageform, parent::getVoc() . "Dosage-Form") . parent::describeClass(parent::getVoc() . "Dosage-Form", "NDC Dosage Form") . parent::triplify($product_id, parent::getVoc() . "dosage-form", $dosageform_id));
             }
         }
         if ($a[7]) {
             //  MV
             $b = explode("; ", $a[7]);
             foreach ($b as $c) {
                 $route = strtolower(trim($c));
                 $route_id = parent::getVoc() . str_replace(" ", "-", ucfirst(strtolower($c)));
                 parent::addRDF(parent::describeIndividual($route_id, $route, parent::getVoc() . "Route") . parent::describeClass(parent::getVoc() . "Route", "NDC Drug Route") . parent::triplify($product_id, parent::getVoc() . "route", $route_id));
             }
         }
         if ($a[8]) {
             $date = substr(0, 4, $a[8]) . "-" . substr(4, 2, $a[8]) . "-" . substr(6, 2, $a[8]);
             parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "start-marketing-date", $date));
         }
         if ($a[9]) {
             $date = substr(0, 4, $a[9]) . "-" . substr(4, 2, $a[9]) . "-" . substr(6, 2, $a[9]);
             parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "end-marketing-date", $date));
         }
         if ($a[10]) {
             parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "marketing-category", $a[10]));
         }
         if ($a[11]) {
             parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "application-number", $a[11]));
         }
         // create a labeller node
         if ($a[12]) {
             $labeller_id = parent::getRes() . md5($a[12]);
             $label = addslashes($a[12]);
             parent::addRDF(parent::describeIndividual($labeller_id, $label, parent::getVoc() . "Labeller") . parent::describeClass(parent::getVoc() . "Labeller", "NDC Labeller") . parent::triplify($product_id, parent::getVoc() . "labeller", $labeller_id));
         }
         // the next three are together
         if ($a[13]) {
             // MV
             $substances = explode(";", $a[13]);
             $strengths = explode(";", $a[14]);
             $units = explode(";", $a[15]);
             $l = '';
             foreach ($substances as $i => $substance) {
                 // list the active ingredient
                 $ingredient_label = strtolower($substance);
                 $strength = '';
                 if (isset($strengths[$i])) {
                     $strength = $strengths[$i];
                 }
                 $unit = $units[$i];
                 $ingredient_id = parent::getRes() . md5($ingredient_label);
                 parent::addRDF(parent::describeIndividual($ingredient_id, $ingredient_label, parent::getVoc() . "Ingredient") . parent::describeClass(parent::getVoc() . "Ingredient", "NDC Ingredient") . parent::triplify($product_id, parent::getVoc() . "ingredient", $ingredient_id));
                 // describe the substance composition
                 $substance_label = "{$strength} {$unit} {$ingredient_label}";
                 $substance_id = parent::getRes() . md5($substance_label);
                 parent::addRDF(parent::describeIndividual($substance_id, $substance_label, parent::getVoc() . "Substance") . parent::triplifyString($substance_id, parent::getVoc() . "amount", $strength) . parent::describeClass(parent::getVoc() . "Substance", "NDC Substance"));
                 $unit_id = parent::getVoc() . md5($unit);
                 parent::addRDF(parent::describeIndividual($unit_id, $unit, parent::getVoc() . "Unit") . parent::describeClass(parent::getVoc() . "Unit", "NDC Unit") . parent::triplify($substance_id, parent::getVoc() . "amount_unit", $unit_id) . parent::triplify($product_id, parent::getVoc() . "has-part", $substance_id));
             }
         }
         if ($a[16]) {
             // MV
             $b = explode(",", $a[16]);
             foreach ($b as $c) {
                 $cat_id = parent::getVoc() . md5($c);
                 parent::addRDF(parent::describeIndividual($cat_id, $c, parent::getVoc() . "Pharmacological-Class") . parent::describeClass(parent::getVoc() . "Pharmacological-Class", "NDC Pharmacological Class") . parent::triplify($product_id, parent::getVoc() . "pharmacological-class", $cat_id));
             }
         }
         parent::WriteRDFBufferToWriteFile();
     }
 }
Beispiel #7
0
 function pubmed()
 {
     $citations = null;
     $ext = substr(strrchr($this->getReadFile()->getFileName(), '.'), 1);
     if ($ext = "gz") {
         $citations = new SimpleXMLElement("compress.zlib://" . $this->getReadFile()->getFileName(), NULL, TRUE);
     } elseif ($ext = "xml") {
         $citations = new SimpleXMLElement($this->getReadFile()->getFileName(), NULL, TRUE);
     }
     foreach ($citations->MedlineCitation as $citation) {
         $this->setCheckPoint('record');
         $pmid = "" . $citation->PMID;
         if (isset($this->id_list)) {
             if (!isset($this->id_list[$pmid])) {
                 continue;
             } else {
                 echo "processing {$pmid}" . PHP_EOL;
             }
         }
         $pmid_uri = parent::getNamespace() . $citation->PMID;
         $article = $citation->Article;
         parent::addRDF(parent::describeIndividual($pmid_uri, $this->getString($article->ArticleTitle), parent::getVoc() . "PubMedRecord") . parent::describeClass(parent::getVoc() . "PubMedRecord", "PubMedRecord") . parent::triplify($pmid_uri, "rdfs:seeAlso", "http://www.ncbi.nlm.nih.gov/pubmed/{$pmid}"));
         // metadata about the record
         $owner = parent::getRes() . md5($citation['Owner']);
         parent::addRDF(parent::describeIndividual($owner, $citation['Owner'], "foaf:Agent") . parent::triplify($pmid_uri, parent::getVoc() . "owner", $owner));
         $status = parent::getRes() . md5($citation['Status']);
         parent::addRDF(parent::describeIndividual($status, $citation['Status'], parent::getVoc() . "Status") . parent::describeClass(parent::getVoc() . "Status", "Status") . parent::triplify($pmid_uri, parent::getVoc() . "status", $status) . parent::triplifyString($pmid_uri, parent::getVoc() . "version", $citation['VersionID']));
         $this->addDate($pmid_uri, "version-date", $citation['VersionDate']);
         $this->addDate($pmid_uri, "date-created", $citation->DateCreated);
         $this->addDate($pmid_uri, "date-revised", $citation->DateRevised);
         $this->addDate($pmid_uri, "date-completed", $citation->DateCompleted);
         if (!empty($citation->MeshHeadingList)) {
             $i = 0;
             foreach ($citation->MeshHeadingList->MeshHeading as $mh) {
                 $id = parent::getRes() . $pmid . "_mh_" . ++$i;
                 $did = parent::getRes() . md5($mh->DescriptorName);
                 parent::addRDF(parent::describeIndividual($id, $mh->DescriptorName, parent::getVoc() . "MeshHeading") . parent::describeClass(parent::getVoc() . "MeshHeading", "MeSH Heading") . parent::triplify($pmid_uri, parent::getVoc() . "mesh-heading", $id) . parent::triplifyString($id, parent::getVoc() . "descriptor-major-topic", "" . $mh->DescriptorName['MajorTopicYN']) . parent::describeIndividual($did, "" . $mh->DescriptorName, parent::getVoc() . "Mesh-Descriptor") . parent::triplify($id, parent::getVoc() . "mesh-descriptor", $did));
                 if (!empty($mh->QualifierName)) {
                     foreach ($mh->QualifierName as $qualifier_name) {
                         $qid = parent::getRes() . md5($qualifier_name);
                         parent::addRDF(parent::describeIndividual($qid, $qualifier_name, parent::getVoc() . "Mesh-Qualifier") . parent::triplify($id, parent::getVoc() . "mesh-qualifier", $qid));
                     }
                 }
             }
         }
         if (!empty($citation->ChemicalList)) {
             $i = 0;
             foreach ($citation->ChemicalList->Chemical as $chemical) {
                 $id = parent::getRes() . $pmid . "_ch_" . ++$i;
                 parent::addRDF(parent::describeIndividual($id, $chemical->NameOfSubstance, parent::getVoc() . "Chemical") . parent::describeClass(parent::getVoc() . "Chemical", "Chemical") . parent::triplify($pmid_uri, parent::getVoc() . "chemical", $id));
                 if ($chemical->RegistryNumber != "0") {
                     // check if "EC"
                     if (substr($chemical->RegistryNumber, 0, 2) == "EC") {
                         $ec = substr($chemical->RegistryNumber, 3);
                         parent::addRDF(parent::triplify($id, parent::getVoc() . "x-ec", "ec:" . $ec));
                     } else {
                         parent::addRDF(parent::triplify($id, parent::getVoc() . "x-cas", "cas:" . $chemical->RegistryNumber));
                     }
                 }
             }
         }
         if (!empty($citation->GeneSymbolList)) {
             foreach ($citation->GeneSymbolList->GeneSymbol as $geneSymbol) {
                 parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "gene-symbol", $geneSymbol));
             }
         }
         if (!empty($citation->SupplMeshList)) {
             foreach ($citation->SupplMeshList->SupplMeshName as $supplMeshName) {
                 $id = parent::getRes() . md5($supplMeshName);
                 parent::addRDF(parent::describeIndividual($id, $supplMeshName, parent::getVoc() . "MeshHeading") . parent::triplify($pmid_uri, parent::getVoc() . "supplemental-mesh-heading", $id));
             }
         }
         foreach ($article->PublicationTypeList->PublicationType as $publicationType) {
             $id = parent::getRes() . md5($publicationType);
             $label = str_replace(" ", "-", $publicationType);
             parent::addRDF(parent::triplify($pmid_uri, parent::getVoc() . "publication-type", $id) . parent::describeClass($id, $publicationType));
         }
         if (!empty($article->Abstract)) {
             $id = parent::getRes() . $pmid . "_ABSTRACT";
             $label = "Abstract for PMID:{$pmid}";
             $abstract = $article->Abstract;
             parent::addRDF(parent::describeIndividual($id, $label, parent::getVoc() . "Article-Abstract") . parent::describeClass(parent::getVoc() . "Article-Abstract", "Article Abstract") . parent::triplify($pmid_uri, "dc:abstract", $id) . parent::triplifyString($id, parent::getVoc() . "copyright", $abstract->CopyrightInformation));
             $section = 0;
             $abstractText = "";
             foreach ($abstract->AbstractText as $text) {
                 $abstractText .= " " . $text;
                 if (!empty($text['Label']) && $text['Label'] !== "UNLABELLED") {
                     $section_id = parent::getRes() . $pmid . "_ABSTRACT_SECTION_" . ++$section;
                     parent::addRDF(parent::triplify($id, parent::getVoc() . "section", $section_id) . parent::triplifyString($section_id, parent::getVoc() . "order", $section) . parent::triplifyString($section_id, parent::getVoc() . "nlm-section-type", $text['NlmCategory']) . parent::triplifyString($section_id, parent::getVoc() . "label", $text['Label']) . parent::triplifyString($section_id, parent::getVoc() . "text", $text));
                 }
             }
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "abstract-text", $abstractText));
         }
         if (!empty($citation->OtherAbstract)) {
             $i = 0;
             foreach ($citation->OtherAbstract as $ab) {
                 $id = parent::getRes() . $pmid . "_oa_" . ++$i;
                 parent::addRDF(parent::describeIndividual($id, "", parent::getVoc() . "Article-Abstract") . parent::describeClass(parent::getVoc() . "Article-Abstract", "Article Abstract") . parent::triplify($pmid_uri, "dc:abstract", $id));
                 $abstractText = "";
                 foreach ($ab->AbstractText as $text) {
                     $abstractText .= " " . $text;
                     if (!empty($text['Label']) && $text['Label'] !== "UNLABELLED") {
                         parent::addRDF(parent::triplifyString($id, parent::getVoc() . "abstract_" . strtolower($text['Category']), $text));
                     }
                 }
                 parent::addRDF(parent::triplifyString($id, parent::getVoc() . "abstract-text", $abstractText));
             }
         }
         $author_types = array("Investigator", "Author", "PersonalNameSubject");
         foreach ($author_types as $author_type) {
             $listname = $author_type . "List";
             if (!empty($article->{$listname}->{$author_type})) {
                 $i = 0;
                 foreach ($article->{$listname}->{$author_type} as $author) {
                     $id = parent::getRes() . $pmid . "_AUTHOR_" . ++$i;
                     $author_label = $author->LastName . ($author->Initials ? ", " . $author->Initials : "");
                     parent::addRDF(parent::describeIndividual($id, $author_label, parent::getVoc() . $author_type) . parent::describeClass(parent::getVoc() . $author_type, $author_type) . parent::triplifyString($id, parent::getVoc() . "list-position", $i) . parent::triplify($pmid_uri, parent::getVoc() . strtolower($author_type), $id) . parent::triplifyString($id, parent::getVoc() . "last-name", $author->LastName) . parent::triplifyString($id, parent::getVoc() . "fore-name", $author->ForeName) . parent::triplifyString($id, parent::getVoc() . "initials", $author->Initials) . parent::triplifyString($id, parent::getVoc() . "collective-name", $author->CollectiveName) . parent::triplifyString($id, parent::getVoc() . "suffix", $author->Suffix));
                     if ($author->Affiliation) {
                         $affilitation = parent::getRes() . md5($author->Affilitation);
                         parent::addRDF(parent::describeIndividual($affilitation, $author->Affilitation, parent::getVoc() . "Organization") . parent::describeClass(parent::getVoc() . "Organization", "Organization") . parent::triplifyString($id, parent::getVoc() . "affiliation", $affilitation));
                     }
                     foreach ($author->NameID as $authorNameId) {
                         if (!empty($authorNameId)) {
                             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "name-id", $author_name_id));
                         }
                     }
                 }
             }
         }
         if (!empty($article->ArticleDate)) {
             $this->addDate($pmid_uri, "article-date", $article->ArticleDate);
         }
         foreach ($article->Language as $language) {
             parent::addRDF(parent::triplifyString($pmid_uri, "dc:language", $language));
         }
         if (!empty($citation->KeywordList)) {
             foreach ($citation->KeywordList->Keyword as $keyword) {
                 parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "keyword", $keyword));
             }
         }
         if (!empty($citation->otherID)) {
             // untested
             foreach ($citation->OtherID as $otherID) {
                 if (!empty($otherID)) {
                     parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "other-id", $other_id) . parent::triplifyString($pmid_uri, parent::getVoc() . "other-id-source", $otherID['Source']));
                     if (strstr($other_id, "PMC")) {
                         parent::addRDF(parent::triplify($pmid_uri, parent::getVoc() . "x-pmc", "pmc:" . $other_id));
                     }
                 }
             }
         }
         if (!empty($article->DataBankList)) {
             foreach ($article->DataBankList->DataBank as $dataBank) {
                 parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "databank", $dataBank->DataBankName));
                 if ($dataBank->AccessionNumberList !== NULL) {
                     foreach ($dataBank->AccessionNumberList->AccessionNumber as $acc) {
                         parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "x-" . strtolower($dataBank->dataBankName), $acc));
                     }
                 }
             }
         }
         if (!empty($article->GrantList)) {
             $i = 0;
             foreach ($article->GrantList->Grant as $grant) {
                 $id = parent::getRes() . $pmid . "_GRANT_" . ++$i;
                 $grant_label = "Grant " . $grant->GrantID . " for " . parent::getNamespace() . $pmid;
                 parent::addRDF(parent::describeIndividual($id, $grant_label, parent::getVoc() . "Grant") . parent::describeClass(parent::getVoc() . "Grant", "Grant") . parent::triplify($pmid_uri, parent::getVoc() . "grant", $id) . parent::triplifyString($id, parent::getVoc() . "grant-identifier", $grant->GrantID) . parent::triplifyString($id, parent::getVoc() . "grant-acronym", $grant->Acronym) . parent::triplifyString($id, parent::getVoc() . "grant-agency", $grant->Agency) . parent::triplifyString($id, parent::getVoc() . "grant-country", $grant->Country));
             }
         }
         if (!empty($citation->NumberOfReferences)) {
             parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "number-of-references", $citation->NumberOfReferences));
         }
         if (!empty($article->VernacularTitle)) {
             parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "vernacular-title", $article->VernacularTitle));
         }
         foreach ($citation->CitationSubset as $citationSubset) {
             if (!empty($citationSubset)) {
                 parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "citation-subset", $citationSubset));
             }
         }
         if (!empty($citation->commentsCorrectionsList)) {
             $i = 0;
             foreach ($commentsCorrectionsList->CommentsCorrections as $commentCorrection) {
                 $id = parent::getRes() . $pmid . "_COMMENT_CORRECTION_" . ++$i;
                 $ccRefType = $commentCorrection['RefType'];
                 $ccPmid = $commentCorrection->PMID;
                 //optional
                 $ccNote = $commentCorrection->Note;
                 //optional
                 $cc_label = "Comment or correction ." . $ccNumber . " for " . parent::getNamespace() . $pmid;
                 parent::addRDF(parent::describeIndividual($id, $cc_label, parent::getVoc() . "CommentCorrection") . parent::describeClass(parent::getVoc() . "CommentCorrection", "CommentCorrection") . parent::triplify($pmid_uri, parent::getVoc() . "comment-correction", $id) . parent::triplify($id, "rdf:type", parent::getVoc() . $ccRefType) . parent::triplifyString($id, parent::getVoc() . "ref-source", $ref_source) . parent::triplifyString($id, parent::getVoc() . "note", $cc_note));
             }
         }
         if (!empty($citation->generalNote)) {
             parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "general-note", $general_note));
         }
         foreach ($citation->SpaceFlightMission as $spaceFlightMission) {
             if (!empty($spaceFlightMission)) {
                 parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "space-flight-mission" . $space_flight_mission));
             }
         }
         $journal = $article->Journal;
         $journalId = parent::getRes() . $pmid . "_JOURNAL";
         $journal_label = "Journal for " . parent::getNamespace() . $pmid;
         parent::addRDF(parent::describeIndividual($journalId, $journal_label, parent::getVoc() . "Journal") . parent::describeClass(parent::getVoc() . "Journal", "Journal") . parent::triplify($pmid_uri, parent::getVoc() . "journal", $journalId) . parent::triplify($journalId, parent::getVoc() . "x-issn", "issn:" . $journal->ISSN) . parent::triplifyString($journalId, parent::getVoc() . "journal-nlm-identifier", $citation->MedLineJournalInfo->NlmUniqueID) . parent::triplifyString($journalId, parent::getVoc() . "journal-title", $journal->Title) . parent::triplifyString($journalId, parent::getVoc() . "journal-abbreviation", $journal->ISOAbbreviation) . parent::triplifyString($journalId, parent::getVoc() . "volume", $journal->JournalIssue->Volume) . parent::triplifyString($journalId, parent::getVoc() . "issue", $journal->JournalIssue->Issue) . parent::triplifyString($journalId, parent::getVoc() . "pages", "" . $article->Pagination->MedlinePgn));
         $journalPubDate = $journal->JournalIssue->PubDate;
         if (!empty($journalPubDate)) {
             $journalYear = $journalPubDate->Year;
             $journalMonth = trim($journalPubDate->Month);
             //optional
             if ($journalMonth and !is_numeric($journalMonth[0])) {
                 $mo = array("jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec");
                 $journalMonth = str_pad(array_search(strtolower($journalMonth), $mo) + 1, 2, "0", STR_PAD_LEFT);
             }
             $journalDay = trim($journalPubDate->Day);
             //optional
             if ($journalDay) {
                 $journalDay = str_pad($journalDay, 2, "0", STR_PAD_LEFT);
             }
             parent::addRDF(parent::triplifyString($journalId, parent::getVoc() . "publication-year", $journalYear) . parent::triplifyString($journalId, parent::getVoc() . "publication-month", $journalMonth) . parent::triplifyString($journalId, parent::getVoc() . "publication-day", $journalDay) . parent::triplifyString($journalId, parent::getVoc() . "publication-season", $journalPubDate->Season) . parent::triplifyString($journalId, parent::getVoc() . "publication-date", $journalPubDate->MedlineDate));
             if (!empty($journalYear) and !empty($journalMonth) and !empty($journalDay)) {
                 parent::addRDF(parent::triplifyString($journalId, parent::getVoc() . "publication-date", "{$journalYear}-{$journalMonth}-{$journalDay}", "xsd:date"));
             }
         }
         foreach ($citation->Article->ELocation as $eLocation) {
             if (!empty($eLocation)) {
                 parent::addRDF(parent::triplifyString($pmid_uri, parent::getVoc() . "elocation", $eLocation));
             }
         }
         $this->writeRDFBufferToWriteFile();
         //break;
     }
 }
Beispiel #8
0
 function models()
 {
     $tax_ids = array("Caenorhabditis elegans" => "6239", "Mus musculus" => "10090", "Saccharomyces cerevisiae" => "4932", "Drosophila melanogaster" => "7227", "Podospora anserina" => "5145", "Mesocricetus auratus" => "10036", "Schizosaccharomyces pombe" => "4896", "Danio rerio" => "7955");
     $h = explode(",", parent::getReadFile()->read());
     $expected_columns = 8;
     if (($n = count($h)) != $expected_columns) {
         trigger_error("Found {$n} columns in gene file - expecting {$expected_columns}!", E_USER_WARNING);
         return false;
     }
     /*
     [0] GenAge ID
     [1] symbol	
     [2] name	
     [3] organism	
     [4] entrez gene id	
     [5] avg lifespan change (max obsv)	
     [6] lifespan effect	
     [7] longevity influence
     */
     while ($l = parent::getReadFile()->read(200000)) {
         $data = str_getcsv($l);
         $genage = str_pad($data[0], 4, "0", STR_PAD_LEFT);
         $gene_symbol = $data[1];
         $name = $data[2];
         $organism = $data[3];
         $ncbi_gene_id = $data[4];
         $max_percent_obsv_avg_lifespan_change = $data[5];
         $lifespan_effect = $data[6];
         $longevity_influence = $data[7];
         $genage_id = parent::getNamespace() . $genage;
         parent::addRDF(parent::describeIndividual($genage_id, $name, parent::getVoc() . "Aging-Related-Gene") . parent::describeClass(parent::getVoc() . "Aging-Related-Gene", "Aging Related Gene"));
         parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "gene-symbol", parent::safeLiteral($gene_symbol)));
         parent::addRDF(parent::triplify($genage_id, parent::getVoc() . "taxon", "ncbitaxon:" . $tax_ids[$organism]));
         if ($ncbi_gene_id !== "") {
             parent::addRDF(parent::triplify($genage_id, parent::getVoc() . "x-ncbigene", "ncbigene:" . $ncbi_gene_id));
         }
         if ($max_percent_obsv_avg_lifespan_change !== "") {
             parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "maximum-percent-observed-average-lifespan-change", parent::safeLiteral($max_percent_obsv_avg_lifespan_change)));
         }
         if ($lifespan_effect == "Increase and Decrease") {
             parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", "increase") . parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", "decrease"));
         } else {
             parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", strtolower($lifespan_effect)));
         }
         parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "longevity-influence", strtolower($longevity_influence)));
         parent::WriteRDFBufferToWriteFile();
     }
 }
Beispiel #9
0
 function gene_manipulations()
 {
     $h = explode(",", parent::getReadFile()->read());
     $expected_columns = 5;
     if (($n = count($h)) != $expected_columns) {
         trigger_error("Found {$n} columns in gene file - expecting {$expected_columns}!", E_USER_WARNING);
         return false;
     }
     while ($l = parent::getReadFile()->read(200000)) {
         $data = str_getcsv($l);
         $gendr = $data[0];
         $gene_symbol = $data[1];
         $species_name = $data[2];
         $geneid = $data[3];
         $gene_name = $data[4];
         //			$references = $data[5];
         $gendr_id = parent::getNamespace() . $gendr;
         $gendr_label = $gene_name . " (" . $gene_symbol . ")";
         $association_id = parent::getRes() . md5($gendr . $geneid . "_association");
         $association_label = "Association between " . $gene_symbol . " and variation in life span extension induced by dietary restriction";
         parent::addRDF(parent::describeIndividual($gendr_id, $gendr_label, parent::getVoc() . "Dietary-Restriction-Life-Extension-Related-Gene") . parent::describeClass(parent::getVoc() . "Dietary-Restriction-Life-Extension-Related-Gene", "Dietery Restriction Life Extension Related Gene") . parent::triplify($gendr_id, parent::getVoc() . "x-ncbigene", "ncbigene:" . $geneid) . parent::triplifyString($gendr_id, parent::getVoc() . "gene-name", $gene_name) . parent::triplifyString($gendr_id, parent::getVoc() . "gene-symbol", $gene_symbol) . parent::describeIndividual($association_id, $association_label, parent::getVoc() . "Gene-Phenotype-Association") . parent::describeClass(parent::getVoc() . "Gene-Phenotype-Association", "Gene Phenotype Association") . parent::triplify($association_id, parent::getVoc() . "gene", $gendr_id) . parent::triplify($association_id, parent::getVoc() . "phenotype", parent::getVoc() . "Diet-Induced-Life-Span-Variant"));
         if ($species_name == "Caenorhabditis elegans") {
             parent::addRDF(parent::triplify($association_id, parent::getVoc() . "phenotype", "wormbase:WBPhenotype:0001837") . parent::triplify($association_id, parent::getVoc() . "taxon", "taxon:6239"));
         } else {
             if ($species_name == "Saccharomyces cerevisiae") {
                 parent::addRDF(parent::triplify($association_id, parent::getVoc() . "taxon", "taxon:4932"));
             } else {
                 if ($species_name == "Schizosaccharomyces pombe") {
                     parent::addRDF(parent::triplify($association_id, parent::getVoc() . "taxon", "taxon:9896"));
                 } else {
                     if ($species_name == "Drosophila melanogaster") {
                         parent::addRDF(parent::triplify($association_id, parent::getVoc() . "taxon", "taxon:7227"));
                     } else {
                         if ($species_name == "Mus musculus") {
                             parent::addRDF(parent::triplify($association_id, parent::getVoc() . "taxon", "taxon:10090"));
                         }
                     }
                 }
             }
         }
         if (!empty($references)) {
             $split_refs = explode(",", $references);
             foreach ($split_refs as $ref) {
                 parent::addRDF(parent::triplify($gendr_id, parent::getVoc() . "article", "pmid:" . $ref) . parent::triplify($association_id, parent::getVoc() . "article", "pmid:" . $ref));
             }
         }
         parent::writeRDFBufferToWriteFile();
     }
     //while
 }
Beispiel #10
0
 /**
  * add an RDF representation of the incoming param to the model.
  * @$desc_record_arr is an assoc array with the contents of one qualifier record
  */
 private function makeDescriptorRecord($desc_record_arr)
 {
     //get the UI of the descriptor record
     $dr_ui = $desc_record_arr["UI"][0];
     $dr_res = $this->getNamespace() . $dr_ui;
     $dr_label = $desc_record_arr['MH'][0];
     parent::AddRDF(parent::describeIndividual($dr_res, $dr_label, $this->getVoc() . "Descriptor", $dr_label) . parent::describeClass($this->getVoc() . "Descriptor", "MeSH Descriptor"));
     //now get the descriptor_data_elements
     $qde = $this->getDescriptorDataElements();
     //iterate over the properties
     foreach ($desc_record_arr as $k => $v) {
         if (array_key_exists($k, $qde)) {
             if ($k == "AN") {
                 foreach ($v as $kv => $vv) {
                     //explode by semicolon
                     $vvrar = explode(";", $vv);
                     foreach ($vvrar as $anAn) {
                         parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde["AN"], $anAn) . parent::describeProperty($this->getVoc() . $qde["AN"], "Relationship between a descriptor and its annotation"));
                     }
                     //foreach
                 }
                 //foreach
             }
             //if
             //add allowable topical qualifiers
             if ($k == "AQ") {
                 //$x = $this->getDescriptorDataElements();
                 foreach ($v as $kv => $vv) {
                     $vvrar = explode(" ", $vv);
                     foreach ($vvrar as $aq) {
                         $aq_res = $this->getRes() . $aq;
                         parent::AddRDF(parent::triplify($aq_res, "rdf:type", $this->getVoc() . "allowable-topical-qualifier") . parent::describeClass($this->getVoc() . "allowable-topical-qualifier", "allowable topical qualifier: " . $qde['AQ']));
                         parent::AddRDF(parent::triplify($dr_res, $this->getVoc() . $qde['AQ'], $aq_res) . parent::describeProperty($this->getVoc() . $qde['AQ'], "Relationship between a descriptor and its allowable topical qualifiers"));
                     }
                     //foreach
                 }
                 //foreach
             }
             //if
             //add CATALOGING SUBHEADINGS LIST NAME
             if ($k == "CATSH") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['CATSH'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['CATSH'], "Relationship between a descriptor and its cataloging subheadings list name"));
                 }
             }
             //if
             if ($k == "CX") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['CX'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['CATSH'], "Relationship between a descriptor and xrefs"));
                 }
             }
             //if
             //add date of entry
             if ($k == "DA") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['DA'], $this->formatDate($vv), "xsd:date") . parent::describeProperty($this->getVoc() . $qde['DA'], "Relationship between a descriptor and its date of entry"));
                 }
             }
             //if
             //descriptor class
             if ($k == "DC") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['DC'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['DC'], "Relationship between a descriptor and its descriptor class"));
                 }
             }
             //if
             //descriptor entry version
             if ($k == "DE") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['DE'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['DE'], "Relationship between a descriptor record and its entry version"));
                 }
             }
             //if
             //descriptor sort version
             if ($k == "DS") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['DS'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['DS'], "Relationship between a descriptor record and its sort version"));
                 }
             }
             //if
             //date major descriptor established
             if ($k == "DX") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['DX'], $this->formatDate($vv), "xsd:date") . parent::describeProperty($this->getVoc() . $qde['DX'], "Relationship between a descriptor and its date of major descriptor established"));
                 }
             }
             //if
             if ($k == "EC") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['EC'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['EC'], "Relationship between a descriptor and its entry combination"));
                 }
             }
             if ($k == "PRINT ENTRY") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['PRINT ENTRY'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['PRINT ENTRY'], "Relationship between a descriptor and its print entry term"));
                 }
             }
             if ($k == "ENTRY") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['ENTRY'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['ENTRY'], "Relationship between a descriptor and its entry term"));
                 }
             }
             if ($k == "FX") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['FX'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['FX'], "Relationship between a descriptor and its forward cross reference"));
                 }
             }
             if ($k == "GM") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['GM'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['GM'], "Relationship between a descriptor and its grateful med note"));
                 }
             }
             if ($k == "HN") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['HN'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['HN'], "Relationship between a descriptor record and its history note"));
                 }
             }
             if ($k == "MED") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['MED'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['MED'], "Relationship between a descriptor and its backfile postings"));
                 }
             }
             if ($k == "M94") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M94'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M94'], "Relationship between a descriptor and its backfile postings"));
                 }
             }
             if ($k == "M90") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M90'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M90'], "Relationship between a descriptor and its backfile postings"));
                 }
             }
             if ($k == "M85") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M85'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M85'], "Relationship between a descriptor record and its backfile postings"));
                 }
             }
             if ($k == "M80") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M80'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M80'], "Relationship between a descriptor record and its backfile postings"));
                 }
             }
             if ($k == "M75") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M75'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M75'], "Relationship between a descriptor record and its backfile postings"));
                 }
             }
             if ($k == "M66") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['M66'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['M66'], "Relationship between a descriptor record and its backfile postings"));
                 }
             }
             if ($k == "MH_TH") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['MH_TH'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['MH_TH'], "Relationship between a descriptor record and its MeSH Heading thesaurus id"));
                 }
             }
             if ($k == "MH") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['MH'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['MH'], "Relationship between a descriptor record and its MeSH Heading"));
                 }
             }
             if ($k == "MN") {
                 foreach ($v as $kv => $vv) {
                     $vid = parent::getNamespace() . $vv;
                     $vlabel = utf8_encode(htmlspecialchars($vv));
                     parent::AddRDF(parent::describeIndividual($vid, $dr_label, parent::getVoc() . "Tree-Entry", $vlabel) . parent::triplify($dr_res, $this->getVoc() . $qde['MN'], $vid) . parent::describeProperty($this->getVoc() . $qde['MN'], "Relationship between a descriptor record and its MeSH Tree Number"));
                     if (FALSE !== ($pos = strrpos($vv, "."))) {
                         $pid = parent::getNamespace() . substr($vv, 0, $pos);
                         parent::addRDF(parent::triplify($vid, "rdfs:subClassOf", $pid));
                     }
                 }
             }
             if ($k == "MR") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['MR'], $this->formatDate($vv), "xsd:date") . parent::describeProperty($this->getVoc() . $qde['MR'], "Relationship between a descriptor record and its major revision date"));
                 }
             }
             if ($k == "MS") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['MS'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['MS'], "Relationship between a descriptor record and its MeSH scope note"));
                 }
             }
             if ($k == "N1") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['N1'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['N1'], "Relationship between a descriptor record and its CAS 1 name"));
                 }
             }
             if ($k == "OL") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['OL'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['OL'], "Relationship between a descriptor record and its online note"));
                 }
             }
             if ($k == "PA") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['PA'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['PA'], "Relationship between a descriptor record and its pharmacological action"));
                 }
             }
             if ($k == "PI") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['PI'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['PI'], "Relationship between a descriptor record and its previous indexing"));
                 }
             }
             if ($k == "PM") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['PM'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['PM'], "Relationship between a descriptor record and its public mesh note"));
                 }
             }
             if ($k == "PX") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['PX'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['PX'], "Relationship between a descriptor record and its pre explosion"));
                 }
             }
             if ($k == "RECTYPE") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['RECTYPE'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['RECTYPE'], "Relationship between a descriptor record and its record type"));
                 }
             }
             if ($k == "RH") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['RH'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['RH'], "Relationship between a descriptor record and its running head, in relation to mesh tree structures"));
                 }
             }
             if ($k == "RN") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['RN'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['RN'], "Relationship between a descriptor record and its CAS registry"));
                 }
             }
             if ($k == "RR") {
                 foreach ($v as $kv => $vv) {
                     parent::AddRDF(parent::triplifyString($dr_res, $this->getVoc() . $qde['RR'], utf8_encode(htmlspecialchars($vv))) . parent::describeProperty($this->getVoc() . $qde['RR'], "Relationship between a descriptor record and its registry number"));
                 }
             }
             if ($k == "ST") {
                 foreach ($v as $kv => $vv) {
                     $vid = parent::getNamespace() . $vv;
                     $pid = parent::getNamespace() . substr($vv, 0, strrpos($vv, ".") - 1);
                     $vlabel = utf8_encode(htmlspecialchars($vv));
                     parent::AddRDF(parent::describeIndividual($vid, $vlabel, parent::getVoc() . "Semantic-Type", $vlabel) . parent::triplify($dr_res, $this->getVoc() . $qde['ST'], $vid) . parent::describeProperty($this->getVoc() . $qde['ST'], "Relationship between a descriptor record and its semantic type"));
                 }
             }
         } else {
             trigger_error("Please add key to descriptor record map: " . $k . PHP_EOL, E_USER_ERROR);
         }
         $this->WriteRDFBufferToWriteFile();
     }
     //foreach
     $this->WriteRDFBufferToWriteFile();
 }
Beispiel #11
0
 function parseEntry($lfile)
 {
     $fp = fopen($lfile, "r");
     while ($l = fgets($fp, 100000)) {
         $k_t = trim(substr($l, 0, 12));
         $v = trim(substr($l, 12));
         if (!$k_t and $v == '') {
             continue;
         }
         // set the key to the current key if not empty, else keep using what was there before
         if (!isset($k)) {
             $k = $k_t;
         } else {
             if (!empty($k_t)) {
                 $k = $k_t;
             }
         }
         if ($k == "///" or $k == "ENTRY1") {
             break;
         }
         if ($k == "ENTRY") {
             $a = explode("  ", $v, 2);
             $e['id'] = str_replace(array("EC ", " "), "", $a[0]);
             if (isset($this->org)) {
                 $e['id'] = $this->org . "_" . $e['id'];
             }
             $e['type'] = trim(str_replace(array("Complete ", "Pathway   Module"), array("", "Pathway Module"), $a[1]));
             $e['type_label'] = str_replace(" ", "-", $e['type']);
             $uri = parent::getNamespace() . $e['id'];
             continue;
         }
         // key with value
         if (in_array($k, array("NAME", "DESCRIPTION", "DEFINITION", "EQUATION", "COMMENT"))) {
             if ($k == "NAME") {
                 parent::addRDF(parent::describeIndividual($uri, $v, parent::getVoc() . $e['type']) . parent::describeClass(parent::getVoc() . $e['type'], $e['type_label']) . parent::triplify($uri, "rdfs:seeAlso", "http://www.kegg.jp/dbget-bin/www_bget?" . $e['id']));
                 if ($e['type'] == 'Genome') {
                     $a = explode(",", $v);
                     parent::addRDF(parent::triplify($uri, "owl:sameAs", "kegg:" . $a[0]));
                 }
             } else {
                 if ($k == "DESCRIPTION") {
                     parent::addRDF(parent::triplifyString($uri, "dc:description", $v));
                 } else {
                     if ($k == "DEFINITION" and $e['type'] == "KO") {
                         preg_match("/\\[([^\\]]+)\\]/", $v, $m);
                         if (isset($m[1])) {
                             parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-ec", $m[1]));
                         }
                     } else {
                         if ($k == "COMMENT") {
                             preg_match("/ICD-O: ([^,]+),/", $v, $m);
                             if (isset($m[1])) {
                                 parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-icdo", "icdo:" . $m[1]));
                                 continue;
                             }
                         } else {
                             parent::addRDF(parent::triplifyString($uri, parent::getVoc() . strtolower($k), $v));
                         }
                     }
                 }
             }
             continue;
         }
         if ($k == "RPAIR" and $e['type'] == "Reaction") {
             $list = explode(" ", $v);
             $id = parent::getRes() . $e['id'] . "." . $list[2] . "." . $list[3];
             $rc = '';
             if (isset($list[4])) {
                 $rc = "kegg:" . substr($list[4], 4, -1);
             }
             parent::addRDF(parent::describeIndividual($id, $e['id'] . " " . $v, parent::getVoc() . "RPair-Role") . parent::describeClass(parent::getVoc() . "RPair-Role", "RPair Role") . parent::triplify($id, parent::getVoc() . "rpair", "kegg:" . $list[0]) . parent::triplifyString($id, parent::getVoc() . "role", $list[3]) . ($rc != '' ? parent::triplify($id, parent::getVoc() . "reaction-center", $rc) : '') . parent::triplify($uri, parent::getVoc() . "rpair-role", $id));
             continue;
         }
         // list of entries
         if (in_array($k, array("ENZYME", "RPAIR", "RELATEDPAIR")) or in_array($e['type'], array("Compound", "RClass", "RPair")) and $k == "REACTION") {
             $list = explode(" ", $v);
             foreach ($list as $id) {
                 if (!$id) {
                     continue;
                 }
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:{$id}"));
             }
             continue;
         }
         // key with semi-colon separated values
         if (in_array($k, array("CLASS", "CATEGORY", "KEYWORDS", "CHROMOSOME", "ANNOTATION", "ACTIVITY", "TYPE"))) {
             $a = explode(";", $v);
             foreach ($a as $c) {
                 parent::addRDF(parent::triplifyString($uri, parent::getVoc() . strtolower($k), trim($c)));
             }
             continue;
         }
         // kegg seems to make a prefix mistake with the pathway identifiers...
         if ($k == "PATHWAY") {
             $a = explode("  ", $v, 2);
             preg_match("/[a-z]+([0-9]{5})/", $a[0], $m);
             if (isset($m[1])) {
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:map" . $m[1]));
             } else {
                 echo "pathway problem: " . $v . PHP_EOL;
             }
             continue;
         }
         // multi-line header with key-value pair
         if (in_array($k, array("PATHWAY_MAP", "STR_MAP", "MODULE", "DISEASE", "KO_PATHWAY", "COMPOUND"))) {
             // PATHWAY_MAP map00010  Glycolysis / Gluconeogenesis
             $a = explode("  ", $v, 2);
             $mid = $a[0];
             if (strpos($a[0], '(') !== FALSE) {
                 $mid = substr($a[0], 0, strpos($a[0], '('));
             }
             if (isset($this->org) and $k == "MODULE") {
                 $mid = substr($mid, strpos($v, "_") + 1);
             }
             parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:" . $mid));
             continue;
         }
         // REACTION parsing
         if (preg_match("/\\[RN:([^\\]]+)]/", $v, $m) != FALSE) {
             $list = explode(" ", $m[1]);
             foreach ($list as $item) {
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:" . $item));
             }
             continue;
         }
         if ($k == "DRUG") {
             preg_match("/\\[DR:([^\\]]+)]/", $v, $m);
             if (isset($m[1])) {
                 $list = explode(" ", $m[1]);
                 foreach ($list as $item) {
                     parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:" . $item));
                 }
                 continue;
             }
         }
         if ($k == "TAXONOMY") {
             parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:" . str_replace("TAX", "taxonomy", $v)));
             continue;
         }
         // a list of objects to parse out that are defined within square brackets
         if (in_array($k, array("SOURCE", "COMPONENT"))) {
             preg_match_all("/\\[([^\\]]+)\\]/", $v, $m);
             if (isset($m[1])) {
                 foreach ($m[1] as $id) {
                     $myid = str_replace(array("TAX", "CPD", "DR"), array("taxonomy", "kegg", "kegg"), $id);
                     parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), $myid));
                 }
                 continue;
             }
         }
         // multi-line header with multi-key single value pair
         if (in_array($k, array("ORTHOLOGY", "REACTION"))) {
             // K00844,K12407,K00845  hexokinase/glucokinase [EC:2.7.1.1 2.7.1.2] [RN:R01786]
             // R01786,R02189,R09085  C00267 -> C00668
             $a = explode(" ", $v, 2);
             $ids = explode(",", $a[0]);
             if ($k == "REACTION" and $ids[0][0] != "R") {
                 echo "unable to parse {$k}" . PHP_EOL;
                 continue;
             }
             if (!isset($a[1])) {
                 if ($e['type'] == "Reaction") {
                     parent::addRDF(parent::triplify($uri, parent::getVoc() . "orthology", "kegg:" . trim($a[0])));
                     continue;
                 }
                 echo "parse error: " . $k . " " . $v . PHP_EOL;
                 continue;
             }
             $str = $a[1];
             foreach ($ids as $id) {
                 $o = '';
                 $o['id'] = $id;
                 $o['label'] = $str;
                 $o['type'] = strtolower($k);
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:{$id}"));
             }
             continue;
         }
         if ($k == "DBLINKS") {
             // DBLINKS     GO: 0006096 0006094
             $a = explode(": ", $v, 2);
             $ns = str_replace(array("ncbi-geneid", "ncbi-gi", "rn", "pubchem", "pdb-ccd", "icd-10", "um-bbd", "iubmb enzyme nomenclature", "explorenz - the enzyme database", "expasy - enzyme nomenclature database", "umbbd (biocatalysis/biodegradation database)", "brenda, the enzyme database"), array("ncbigene", "gi", "kegg", "pubchem.compound", "ccd", "icd10", "umbbd", "ec", "ec", "ec", "ec", "ec"), strtolower($a[0]));
             $ids = explode(" ", $a[1]);
             foreach ($ids as $id) {
                 if (!$id) {
                     continue;
                 }
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-{$ns}", "{$ns}:{$id}"));
             }
             continue;
         }
         if ($k == "REMARK") {
             preg_match("/Same as: ([A-Z0-9]+)/", $v, $m);
             if (isset($m[1])) {
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . "same-as", "kegg:" . $m[1]));
                 continue;
             }
             preg_match("/ATC code: (.*)/", $v, $m);
             if (isset($m[1])) {
                 $list = explode(" ", $m[1]);
                 foreach ($list as $item) {
                     parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-atc", "atc:" . $item));
                 }
                 continue;
             }
             preg_match("/Therapeutic category: (.*)/", $v, $m);
             if (isset($m[1])) {
                 $list = explode(" ", $m[1]);
                 foreach ($list as $item) {
                     parent::addRDF(parent::triplifyString($uri, parent::getVoc() . "therapeutic-category", $item));
                 }
                 continue;
             }
             preg_match("/Drug group: (.*)/", $v, $m);
             if (isset($m[1])) {
                 $list = explode(" ", $m[1]);
                 foreach ($list as $item) {
                     parent::addRDF(parent::triplify($uri, parent::getVoc() . "drug-group", "kegg:" . $item));
                 }
                 continue;
             }
         }
         if ($k == "PRODUCT" or $k == "SUBSTRATE") {
             preg_match("/([a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})/", $v, $m);
             if (isset($m[1])) {
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-dailymed", "dailymed:" . $m[1]) . parent::triplifyString("dailymed:" . $m[1], "rdfs:label", $v));
                 continue;
             }
             preg_match("/\\[CPD:([^\\]]+)\\]/", $v, $m);
             if (isset($m[1])) {
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . strtolower($k), "kegg:" . $m[1]));
                 continue;
             }
         }
         if ($k == "STATISTICS") {
             $a = explode(": ", $v);
             parent::addRDF(parent::triplifyString($uri, parent::getVoc() . str_replace(" ", "-", strtolower($a[0])), $a[1]));
             continue;
         }
         if ($k == "ORGANISM") {
             $a = explode(" ", $v);
             parent::addRDF(parent::triplify($uri, parent::getVoc() . "organism", "kegg:" . $a[0]));
             continue;
         }
         if ($k == "REFERENCE") {
             if (!isset($ref)) {
                 $ref = 1;
             } else {
                 if (!isset($e['reference'][$ref]['title'])) {
                     continue;
                 }
                 // this is a bug where the reference declaration is split onto two lines
                 $ref++;
             }
             if (strstr($v, "PMID")) {
                 // PMID:11529849 (marker)
                 preg_match("/(PMID:[0-9]+) /", $v, $m);
                 if (isset($m[1])) {
                     $e['reference'][$ref]['pubmed'] = $m[1];
                 }
             }
             continue;
         }
         if ($k == "AUTHORS") {
             $e['reference'][$ref]['authors'] = $v;
             continue;
         }
         if ($k == "TITLE") {
             $e['reference'][$ref]['title'] = $v;
             continue;
         }
         if ($k == "JOURNAL") {
             $e['reference'][$ref]['journal'] = $v;
             continue;
         }
         if ($e['type'] == "Disease" and ($k == "GENE" or $k == "MARKER")) {
             // BCR-ABL (translocation) [HSA:613 25] [KO:K08878 K06619]
             preg_match_all("/ \\[([^\\]]+)\\]/", $v, $m);
             if (isset($m[1])) {
                 foreach ($m[1] as $idlist) {
                     $a = explode(":", $idlist);
                     $ns = $a[0];
                     $b = explode(" ", $a[1]);
                     foreach ($b as $id) {
                         if ($ns == "KO") {
                             $rel = "ko-" . strtolower($k);
                             $gene = $id;
                         } else {
                             $rel = strtolower($k);
                             $gene = $ns . "_" . $id;
                         }
                         parent::addRDF(parent::triplify($uri, parent::getVoc() . $rel, "kegg:{$gene}"));
                     }
                 }
             } else {
                 echo $v;
             }
             continue;
         }
         if ($k == "GENES") {
             // ATH: AT1G32780 AT1G64710 AT1G77120(ADH1) AT5G24760
             $a = explode(": ", $v);
             $org = $a[0];
             $b = explode(" ", $a[1]);
             foreach ($b as $id) {
                 $c = explode("(", $id);
                 $gene = parent::getNamespace() . $org . "_" . $c[0];
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . "gene", $gene));
             }
             //echo parent::getRDF();exit;
             continue;
         }
         if ($k == "DRUG_TARGET") {
             // Afatinib: D09724 D09733
             $s = substr($v, strpos($v, ":") + 2);
             $list = explode(" ", $s);
             foreach ($list as $item) {
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . "drug-target", "kegg:{$item}"));
             }
             continue;
         }
         if ($k == "STRUCTURE") {
             $list = explode(" ", $v);
             foreach ($list as $item) {
                 if (trim($item) == '') {
                     continue;
                 }
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-pdb", "pdb:{$item}"));
             }
             continue;
         }
         if ($k == "MOTIF") {
             $list = explode(" ", $v);
             foreach ($list as $item) {
                 parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-pfam", "pfam:{$item}"));
             }
             continue;
         }
         if (in_array($k, array("INTERACTION", "METABOLISM", "TARGET"))) {
             // dopamine D2-receptor antagonist [HSA:1813] [KO:K04145]
             $id = parent::getRes() . md5($uri . $v);
             $type = ucfirst(strtolower($k));
             if (in_array($k, array("INTERACTION", "METABOLISM"))) {
                 $a = explode(":", $v, 2);
                 $modifier = $a[0];
             } else {
                 $modifier = '';
                 $s = substr($v, 0, strpos($v, "[") + 1);
                 // dopamine D2-receptor antagonist [
                 preg_match("/ ([a-z]+) \\[/", $s, $m);
                 if (isset($m[1])) {
                     $modifier = $m[1];
                 }
             }
             parent::addRDF(parent::describeIndividual($id, $v, parent::getVoc() . $type) . parent::describeClass(parent::getVoc() . $type, $type) . parent::triplifyString($id, parent::getVoc() . "modifier", $modifier) . parent::triplify($uri, parent::getVoc() . strtolower($k), $id));
             preg_match_all("/ \\[([^\\]]+)\\]/", $v, $m);
             if (isset($m[1])) {
                 foreach ($m[1] as $item) {
                     if (!strstr($item, "KO")) {
                         $item = "kegg:" . str_replace(":", "_", $item);
                     } else {
                         $item = str_replace("KO:", "kegg:", $item);
                     }
                     parent::addRDF(parent::triplify($id, parent::getVoc() . "link", $item));
                 }
             }
             continue;
         }
         // skip these
         if (in_array($k, array("ATOM", "BOND", "BRITE", "AASEQ", "NTSEQ", "SEQUENCE"))) {
             continue;
         }
         // simple strings to keep as is
         if (in_array($k, array("EXACT_MASS", "FORMULA", "MOL_WEIGHT", "LINEAGE", "LENGTH", "MASS", "COMPOSITION", "NODE", "EDGE", "POSITION"))) {
             parent::addRDF(parent::triplifyString($uri, parent::getVoc() . strtolower($k), $v));
             continue;
         }
         // default catchall
         parent::addRDF(parent::triplifyString($uri, parent::getVoc() . strtolower($k), $v . " [script:default]"));
     }
     if (isset($e['reference'])) {
         foreach ($e['reference'] as $i => $r) {
             $ref = parent::getRes() . $e['id'] . ".ref.{$i}";
             parent::addRDF(parent::describeIndividual($ref, $r['title'], parent::getVoc() . "Reference") . parent::describeClass(parent::getVoc() . "Reference", "Reference") . parent::triplifyString($ref, parent::getVoc() . "authors", $r['authors']) . parent::triplifyString($ref, parent::getVoc() . "journal", $r['journal']) . parent::triplify($uri, parent::getVoc() . "reference", $ref));
             if (isset($r['pubmed'])) {
                 parent::addRDF(parent::triplify($ref, parent::getVoc() . "x-pubmed", $r['pubmed']));
             }
         }
     }
     fclose($fp);
 }
Beispiel #12
0
 function ParseEntry($obj, $type)
 {
     $o = $obj["omim"]["entryList"][0]["entry"];
     $omim_id = $o['mimNumber'];
     $omim_uri = parent::getNamespace() . $o['mimNumber'];
     if (isset($o['version'])) {
         parent::setDatasetVersion($o['version']);
     }
     // add the links
     parent::addRDF($this->QQuadO_URL($omim_uri, "rdfs:seeAlso", "http://omim.org/entry/" . $omim_id));
     parent::addRDF($this->QQuadO_URL($omim_uri, "owl:sameAs", "http://identifiers.org/omim/" . $omim_id));
     // parse titles
     $titles = $o['titles'];
     parent::addRDF(parent::describeIndividual($omim_uri, $titles['preferredTitle'], parent::getVoc() . str_replace(array(" ", "/"), "-", ucfirst($type))) . parent::describeClass(parent::getVoc() . str_replace(array(" ", "/"), "-", ucfirst($type)), $type));
     if (isset($titles['preferredTitle'])) {
         parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "preferred-title", $titles['preferredTitle']));
     }
     if (isset($titles['alternativeTitles'])) {
         $b = explode(";;", $titles['alternativeTitles']);
         foreach ($b as $title) {
             parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "alternative-title", trim($title)));
         }
     }
     // parse text sections
     if (isset($o['textSectionList'])) {
         foreach ($o['textSectionList'] as $i => $section) {
             if ($section['textSection']['textSectionTitle'] == "Description") {
                 parent::addRDF(parent::triplifyString($omim_uri, "dc:description", $section['textSection']['textSectionContent']));
             } else {
                 $p = str_replace(" ", "-", strtolower($section['textSection']['textSectionTitle']));
                 parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "{$p}", $section['textSection']['textSectionContent']));
             }
             // parse the omim references
             preg_match_all("/\\{([0-9]{6})\\}/", $section['textSection']['textSectionContent'], $m);
             if (isset($m[1][0])) {
                 foreach ($m[1] as $oid) {
                     parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "refers-to", "omim:{$oid}"));
                 }
             }
         }
     }
     // allelic variants
     if (isset($o['allelicVariantList'])) {
         foreach ($o['allelicVariantList'] as $i => $v) {
             $v = $v['allelicVariant'];
             $uri = parent::getRes() . "{$omim_id}" . "_allele_" . $i;
             $label = str_replace("\n", " ", $v['name']);
             parent::addRDF(parent::describeIndividual($uri, $label, parent::getVoc() . "Allelic-Variant") . parent::describeClass(parent::getVoc() . "Allelic-Variant", "Allelic Variant"));
             if (isset($v['alternativeNames'])) {
                 $names = explode(";;", $v['alternativeNames']);
                 foreach ($names as $name) {
                     $name = str_replace("\n", " ", $name);
                     parent::addRDF(parent::triplifyString($uri, parent::getVoc() . "alternative-names", $name));
                 }
             }
             if (isset($v['text'])) {
                 parent::addRDF(parent::triplifyString($uri, "dc:description", $v['text']));
             }
             if (isset($v['mutations'])) {
                 parent::addRDF(parent::triplifyString($uri, parent::getVoc() . "mutation", $v['mutations']));
             }
             if (isset($v['dbSnps'])) {
                 $snps = explode(",", $v['dbSnps']);
                 foreach ($snps as $snp) {
                     parent::addRDF(parent::triplify($uri, parent::getVoc() . "x-dbsnp", "dbsnp:" . $snp));
                 }
             }
             parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "variant", $uri));
         }
     }
     // clinical synopsis
     if (isset($o['clinicalSynopsis'])) {
         $cs = $o['clinicalSynopsis'];
         $cs_uri = parent::getRes() . "" . $omim_id . "_cs";
         parent::addRDF(parent::describeIndividual($cs_uri, "Clinical synopsis for omim {$omim_id}", parent::getVoc() . "Clinical-Synopsis") . parent::describeClass(parent::getVoc() . "Clinical-Synopsis", "Clinical Synopsis") . parent::triplify($omim_uri, parent::getVoc() . "clinical-synopsis", $cs_uri));
         foreach ($cs as $k => $v) {
             if (!strstr($k, "Exists")) {
                 // ignore the boolean assertion.
                 // @todo ignore provenance for now
                 if (in_array($k, array('contributors', 'creationDate', 'editHistory', 'epochCreated', 'dateCreated', 'epochUpdated', 'dateUpdated'))) {
                     continue;
                 }
                 if (!is_array($v)) {
                     $v = array($k => $v);
                 }
                 foreach ($v as $k1 => $v1) {
                     $phenotypes = explode(";", $v1);
                     foreach ($phenotypes as $coded_phenotype) {
                         // parse out the codes
                         $coded_phenotype = trim($coded_phenotype);
                         if (!$coded_phenotype) {
                             continue;
                         }
                         $phenotype = preg_replace("/\\{.*\\}/", "", $coded_phenotype);
                         $phenotype_id = parent::getRes() . "" . md5(strtolower($phenotype));
                         $entity_id = parent::getRes() . "" . $k1;
                         parent::addRDF(parent::describeIndividual($phenotype_id, $phenotype, parent::getVoc() . 'Characteristic') . parent::describeClass(parent::getVoc() . 'Characteristic', 'Characteristic') . parent::triplify($cs_uri, parent::getVoc() . "feature", $phenotype_id) . parent::describeIndividual($entity_id, $k1, parent::getVoc() . "Entity") . parent::describeClass(parent::getVoc() . "Entity", "Entity") . parent::triplify($phenotype_id, parent::getVoc() . "characteristic-of", $entity_id));
                         // parse out the vocab references
                         preg_match_all("/\\{([0-9A-Za-z \\:\\-\\.]+)\\}|;/", $coded_phenotype, $codes);
                         //preg_match_all("/((UMLS|HPO HP|SNOMEDCT|ICD10CM|ICD9CM|EOM ID)\:[A-Z0-9]+)/",$coded_phenotype,$m);
                         if (isset($codes[1][0])) {
                             foreach ($codes[1] as $entry) {
                                 $entries = explode(" ", trim($entry));
                                 foreach ($entries as $e) {
                                     if ($e == "HPO" || $e == "EOM") {
                                         continue;
                                     }
                                     $this->getRegistry()->parseQName($e, $ns, $id);
                                     if (!isset($ns) || $ns == '') {
                                         $b = explode(".", $id);
                                         $ns = "omim";
                                         $id = $b[0];
                                     } else {
                                         $ns = str_replace(array("hpo", "id", "icd10cm", "icd9cm", "snomedct"), array("hp", "eom", "icd10", "icd9", "snomed"), $ns);
                                     }
                                     parent::addRDF(parent::triplify($phenotype_id, parent::getVoc() . "x-{$ns}", "{$ns}:{$id}"));
                                 }
                                 // foreach
                             }
                             // foreach
                         }
                         // codes
                     }
                     //foreach
                 }
                 // foreach
             }
             // exists
         }
     }
     // clinical synopsis
     // genemap
     if (isset($o['geneMap'])) {
         $map = $o['geneMap'];
         if (isset($map['chromosome'])) {
             parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "chromosome", (string) $map['chromosome']));
         }
         if (isset($map['cytoLocation'])) {
             parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "cytolocation", (string) $map['cytoLocation']));
         }
         if (isset($map['geneSymbols'])) {
             $b = preg_split("/[,;\\. ]+/", $map['geneSymbols']);
             foreach ($b as $symbol) {
                 parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "gene-symbol", "symbol:" . trim($symbol)));
             }
         }
         if (isset($map['geneName'])) {
             $b = explode(",", $map['geneName']);
             foreach ($b as $name) {
                 parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "gene-name", trim($name)));
             }
         }
         if (isset($map['mappingMethod'])) {
             $b = explode(",", $map['mappingMethod']);
             foreach ($b as $c) {
                 $mapping_method = trim($c);
                 $method_uri = $this->get_method_type($mapping_method);
                 if ($method_uri !== false) {
                     parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "mapping-method", $method_uri));
                 }
             }
         }
         if (isset($map['mouseGeneSymbol'])) {
             $b = explode(",", $map['mouseGeneSymbol']);
             foreach ($b as $c) {
                 parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "mouse-gene-symbol", "symbol:" . strtoupper($c)));
             }
         }
         if (isset($map['mouseMgiID'])) {
             $b = explode(",", $map['mouseMgiID']);
             foreach ($b as $c) {
                 parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-mgi", $c));
             }
         }
         if (isset($map['geneInheritance']) && $map['geneInheritance'] != '') {
             parent::addRDF(parent::triplifyString($omim_uri, parent::getVoc() . "gene-inheritance", $map['geneInheritance']));
         }
     }
     if (isset($o['phenotypeMapList'])) {
         foreach ($o['phenotypeMapList'] as $i => $phenotypeMap) {
             $phenotypeMap = $phenotypeMap['phenotypeMap'];
             $pm_uri = parent::getRes() . $omim_id . "_pm_" . ($i + 1);
             parent::addRDF(parent::describeIndividual($pm_uri, "phenotype mapping for {$omim_id}", parent::getVoc() . "Phenotype-Map") . parent::describeClass(parent::getVoc() . "Phenotype-Map", "OMIM Phenotype-Map") . parent::triplify($omim_uri, parent::getVoc() . "phenotype-map", $pm_uri));
             foreach (array_keys($phenotypeMap) as $k) {
                 if (in_array($k, array("mimNumber", "phenotypeMimNumber", "phenotypicSeriesMimNumber"))) {
                     parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . $k, "omim:" . $phenotypeMap[$k]));
                 } else {
                     if ($k == "geneSymbols") {
                         $l = explode(", ", $phenotypeMap[$k]);
                         foreach ($l as $gene) {
                             parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . "gene-symbol", "hgnc.symbol:" . $gene));
                         }
                     } else {
                         if ($k == "phenotypeMappingKey") {
                             $l = $this->get_phenotype_mapping_method_type($phenotypeMap[$k]);
                             parent::addRDF(parent::triplify($pm_uri, parent::getVoc() . "mapping-method", $l));
                         } else {
                             parent::addRDF(parent::triplifyString($pm_uri, parent::getVoc() . $k, $phenotypeMap[$k]));
                         }
                     }
                 }
             }
         }
     }
     // references
     if (isset($o['referenceList'])) {
         foreach ($o['referenceList'] as $i => $r) {
             $r = $r['reference'];
             if (isset($r['pubmedID'])) {
                 $pubmed_uri = "pubmed:" . $r['pubmedID'];
                 parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "article", $pubmed_uri));
                 $title = 'article';
                 if (isset($r['title'])) {
                     $title = $r['title'];
                 }
                 parent::addRDF(parent::describe($pubmed_uri, addslashes($r['title'])));
                 if (isset($r['articleUrl'])) {
                     parent::addRDF($this->QQuadO_URL($pubmed_uri, "rdfs:seeAlso", htmlentities($r['articleUrl'])));
                 }
             }
         }
     }
     // external ids
     if (isset($o['externalLinks'])) {
         foreach ($o['externalLinks'] as $k => $id) {
             if ($id === false) {
                 continue;
             }
             $ns = '';
             switch ($k) {
                 case 'approvedGeneSymbols':
                     $ns = 'symbol';
                     break;
                 case 'geneIDs':
                     $ns = 'ncbigene';
                     break;
                 case 'ncbiReferenceSequences':
                     $ns = 'gi';
                     break;
                 case 'genbankNucleotideSequences':
                     $ns = 'gi';
                     break;
                 case 'proteinSequences':
                     $ns = 'gi';
                     break;
                 case 'uniGenes':
                     $ns = 'unigene';
                     break;
                 case 'ensemblIDs':
                     $ns = 'ensembl';
                     break;
                 case 'swissProtIDs':
                     $ns = 'uniprot';
                     break;
                 case 'mgiIDs':
                     $ns = 'mgi';
                     $b = explode(":", $id);
                     $id = $b[1];
                     break;
                 case 'flybaseIDs':
                     $ns = 'flybase';
                     break;
                 case 'zfinIDs':
                     $ns = 'zfin';
                     break;
                 case 'hprdIDs':
                     $ns = 'hprd';
                     break;
                 case 'orphanetDiseases':
                     $ns = 'orphanet';
                     break;
                 case 'refSeqAccessionIDs':
                     $ns = 'refseq';
                     break;
                 case 'ordrDiseases':
                     $ns = 'ordr';
                     $b = explode(";;", $id);
                     $id = $b[0];
                     break;
                 case 'snomedctIDs':
                     $ns = 'snomed';
                     break;
                 case 'icd10cmIDs':
                     $ns = 'icd10';
                     break;
                 case 'icd9cmIDs':
                     $ns = 'icd9';
                     break;
                 case 'umlsIDs':
                     $ns = 'umls';
                     break;
                 case 'wormbaseIDs':
                     $ns = 'wormbase';
                     break;
                 case 'diseaseOntologyIDs':
                     $ns = 'do';
                     break;
                     // specifically ignorning
                 // specifically ignorning
                 case 'geneTests':
                 case 'cmgGene':
                 case 'geneticAllianceIDs':
                     // #
                 // #
                 case 'nextGxDx':
                 case 'nbkIDs':
                     // NBK1207;;Alport Syndrome and Thin Basement Membrane Nephropathy
                 // NBK1207;;Alport Syndrome and Thin Basement Membrane Nephropathy
                 case 'newbornScreeningUrls':
                 case 'decipherUrls':
                 case 'geneReviewShortNames':
                 case 'locusSpecificDBs':
                 case 'geneticsHomeReferenceIDs':
                 case 'omiaIDs':
                 case 'coriellDiseases':
                 case 'clinicalDiseaseIDs':
                 case 'possumSyndromes':
                 case 'keggPathways':
                 case 'gtr':
                 case 'gwasCatalog':
                 case 'mgiHumanDisease':
                 case 'wormbaseDO':
                 case 'dermAtlas':
                     // true/false
                     break;
                 default:
                     echo "unhandled external link {$k} {$id}" . PHP_EOL;
             }
             $ids = explode(",", $id);
             foreach ($ids as $id) {
                 if ($ns) {
                     if (strstr($id, ";;") === FALSE) {
                         parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-{$ns}", $ns . ':' . $id));
                     } else {
                         $b = explode(";;", $id);
                         // multiple ids//names
                         foreach ($b as $c) {
                             preg_match("/([a-z])/", $c, $m);
                             if (!isset($m[1])) {
                                 parent::addRDF(parent::triplify($omim_uri, parent::getVoc() . "x-{$ns}", $ns . ':' . $c));
                             }
                         }
                     }
                 }
             }
         }
     }
     //external links
 }
Beispiel #13
0
 private function names()
 {
     while ($l = $this->getReadFile()->read(200000)) {
         $a = explode("\t|\t", trim($l, "|\t\r\n"));
         if (count($a) == 0) {
             continue;
         }
         $taxid = parent::getNamespace() . trim($a[0]);
         $name = utf8_encode($a[1]);
         $rel = parent::getVoc() . str_replace(" ", "-", $a[3]);
         parent::addRDF(parent::triplifyString($taxid, $rel, $name) . parent::triplifyString($taxid, parent::getVoc() . "unique-name", utf8_encode($a[2])));
         if ($rel == "scientific-name") {
             parent::addRDF(parent::triplifyString($taxid, "dc:title", $name) . parent::triplifyString($taxid, "rdfs:label", $name));
         }
         $this->writeRDFBufferToWriteFile();
     }
     //while
 }
Beispiel #14
0
 function genes($file)
 {
     $xml = new CXML($file);
     while ($xml->parse("DisorderList") == TRUE) {
         $x = $xml->GetXMLRoot();
         foreach ($x->Disorder as $d) {
             $orphanet_id = parent::getNamespace() . (string) $d->OrphaNumber;
             $disorder_name = (string) $d->Name;
             foreach ($d->DisorderGeneAssociationList->DisorderGeneAssociation as $dga) {
                 // gene
                 $gene = $dga->Gene;
                 $gene_id = parent::getNamespace() . (string) $gene->OrphaNumber;
                 $gene_internal_id = (string) $gene->attributes()->id;
                 $gene_label = (string) $gene->Name;
                 $gene_symbol = (string) $gene->Symbol;
                 parent::addRDF(parent::describeIndividual($gene_id, $gene_label, parent::getVoc() . "Gene") . parent::describeClass(parent::getVoc() . "Gene", "orphanet gene") . parent::triplifyString($gene_id, parent::getVoc() . "symbol", $gene_symbol));
                 foreach ($gene->SynonymList as $s) {
                     $synonym = (string) $s->Synonym;
                     parent::addRDF(parent::triplifyString($gene_id, parent::getVoc() . "synonym", $synonym));
                 }
                 foreach ($gene->ExternalReferenceList as $erl) {
                     $er = $erl->ExternalReference;
                     $db = (string) $er->Source;
                     $db = parent::getRegistry()->getPreferredPrefix($db);
                     $id = (string) $er->Reference;
                     $xref = "{$db}:{$id}";
                     parent::addRDF(parent::triplify($gene_id, parent::getVoc() . "x-{$db}", $xref));
                 }
                 $dga_id = parent::getRes() . (string) $d->OrphaNumber . "_" . md5($dga->asXML());
                 $ga = $dga->DisorderGeneAssociationType;
                 $ga_id = parent::getNamespace() . (string) $ga->attributes()->id;
                 $ga_label = (string) $ga->Name;
                 $s = $dga->DisorderGeneAssociationStatus;
                 $s_id = parent::getNamespace() . (string) $s->attributes()->id;
                 $s_label = (string) $s->Name;
                 parent::addRDF(parent::describeIndividual($dga_id, "{$ga_label} {$gene_label} in {$disorder_name} ({$s_label})", $ga_id) . parent::describeClass($ga_id, $ga_label, parent::getVoc() . "Disorder-Gene-Association") . parent::triplify($dga_id, parent::getVoc() . "status", $s_id) . parent::describeClass($s_id, $s_label, parent::getVoc() . "Disorder-Gene-Association-Status") . parent::triplify($dga_id, parent::getVoc() . "disorder", $orphanet_id) . parent::describeIndividual($orphanet_id, $disorder_name, parent::getVoc() . "Disorder") . parent::triplify($dga_id, parent::getVoc() . "gene", $gene_id));
             }
             parent::writeRDFBufferToWriteFile();
         }
     }
     unset($xml);
 }
Beispiel #15
0
 function parse($file)
 {
     $xml = new CXML($file);
     $xml->parse();
     $entry = $xml->getXMLRoot();
     if (!isset($entry) or !$entry) {
         return false;
     }
     foreach ($entry->children() as $o) {
         $rsid = "rs" . $o->attributes()->rsId;
         $id = parent::getNamespace() . $rsid;
         $type = parent::getVoc() . ucfirst(str_replace(" ", "-", (string) $o->attributes()->snpClass));
         $snpclass = parent::getVoc() . (string) $o->attributes()->snpClass;
         $moltype = parent::getVoc() . (string) $o->attributes()->molType;
         // attributes
         parent::addRDF(parent::describeIndividual($id, $rsid, $type) . parent::describeClass($type, ucfirst("" . $o->attributes()->snpClass)) . parent::triplify($id, parent::getVoc() . "mol-type", $moltype) . parent::describeClass($moltype, (string) $o->attributes()->molType, parent::getVoc() . "Moltype") . parent::describeClass(parent::getVoc() . "Moltype", "Moltype") . parent::triplify($id, parent::getVoc() . "taxid", "taxonomy:" . (string) $o->attributes()->taxId));
         $genotype = (string) $o->attributes()->genoType;
         if ($genotype) {
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "genotype", parent::getVoc() . $genotype, "xsd:bool"));
         }
         // frequency
         // create/update
         /*			if(!isset($o->Update)) $a = $o->Create;
         			else $a = $o->Update;
         			parent::addRDF(parent::triplifyString($id,parent::getVoc()."build",(string) $a->attributes()->build));
         */
         //validation
         $a = $o->Validation;
         parent::addRDF(parent::triplifyString($id, parent::getVoc() . "validation-by-cluster", (string) $a->attributes()->byCluster) . parent::triplifyString($id, parent::getVoc() . "validation-by-frequency", (string) $a->attributes()->byFrequency) . parent::triplifyString($id, parent::getVoc() . "validation-by-2hit2allele", (string) $a->attributes()->by2Hit2Allele) . parent::triplifyString($id, parent::getVoc() . "validation-by-1000G", (string) $a->attributes()->by1000G));
         //hgvs names
         foreach ($o->hgvs as $name) {
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "hgvs-name", (string) $name));
         }
         // assembly
         $assembly = $o->Assembly;
         if ($assembly and $assembly->attributes()->reference == "true") {
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "dbsnp-build", (string) $assembly->attributes()->dbSnpBuild) . parent::triplifyString($id, parent::getVoc() . "genome-build", (string) $assembly->attributes()->genomeBuild));
             $component = $assembly->Component;
             if ($component) {
                 parent::addRDF(parent::triplify($id, parent::getVoc() . "contig-accession", "genbank:" . (string) $component->attributes()->accession) . parent::triplify($id, parent::getVoc() . "contig-gi", "gi:" . (string) $component->attributes()->gi) . parent::triplifyString($id, parent::getVoc() . "chromosome", (string) $component->attributes()->chromosome));
                 $maploc = $component->MapLoc;
                 if ($maploc) {
                     foreach ($maploc->children() as $fxnset) {
                         $fxnset_id = parent::getRes() . md5($fxnset->asXML());
                         parent::addRDF(parent::triplify($id, parent::getVoc() . "maps-to", $fxnset_id) . parent::triplify($fxnset_id, "rdf:type", parent::getVoc() . "Fxnset") . parent::describeClass(parent::getVoc() . "Fxnset", "Fxnset"));
                         if (isset($fxnset->attributes()->geneId)) {
                             parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "gene", "ncbigene:" . (string) $fxnset->attributes()->geneId));
                         }
                         if (isset($fxnset->attributes()->symbol)) {
                             parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "gene-symbol", (string) $fxnset->attributes()->symbol));
                         }
                         if (isset($fxnset->attributes()->mrnaAcc)) {
                             parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "mrna", "refseq:" . (string) $fxnset->attributes()->mrnaAcc));
                         }
                         if (isset($fxnset->attributes()->protAcc)) {
                             parent::addRDF(parent::triplify($fxnset_id, parent::getVoc() . "protein", "refseq:" . (string) $fxnset->attributes()->protAcc));
                         }
                         if (isset($fxnset->attributes()->fxnClass)) {
                             parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "fxn-class", (string) $fxnset->attributes()->fxnClass));
                         }
                         if (isset($fxnset->attributes()->allele)) {
                             parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "allele", (string) $fxnset->attributes()->allele));
                         }
                         if (isset($fxnset->attributes()->residue)) {
                             parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "residue", (string) $fxnset->attributes()->residue));
                         }
                         if (isset($fxnset->attributes()->readingFrame)) {
                             parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "reading-frame", (string) $fxnset->attributes()->readingFrame));
                         }
                         if (isset($fxnset->attributes()->aaPosition)) {
                             parent::addRDF(parent::triplifyString($fxnset_id, parent::getVoc() . "position", (string) $fxnset->attributes()->aaPosition));
                         }
                     }
                 }
             }
         }
     }
     unset($xml);
 }