Ejemplo n.º 1
0
 function Run()
 {
     echo "processing miriam database";
     // directory shortcuts
     $ldir = $this->getParameterValue('indir');
     $odir = $this->getParameterValue('outdir');
     // download and set the read file
     $file = 'miriam.xml';
     $rfile = $this->getParameterValue("download_url");
     $lfile = $ldir . $file;
     if (!file_exists($lfile) || $this->getParameterValue("download") == "true") {
         utils::downloadSingle($rfile, $lfile);
     }
     parent::setReadFile($lfile);
     // set the write file
     $outfile = "miriam." . parent::getParameterValue('output_format');
     $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true;
     parent::setWriteFile(parent::getParameterValue("outdir") . $outfile, $gz);
     $this->parse();
     parent::WriteRDFBufferToWriteFile();
     $this->getWriteFile()->Close();
     return true;
 }
Ejemplo n.º 2
0
 function CTD_chem_gene_ixn_types()
 {
     $first = true;
     while ($l = $this->GetReadFile()->Read()) {
         if ($l[0] == '#') {
             continue;
         }
         $a = explode("\t", $l);
         // check number of columns
         if ($first) {
             if (($c = count(explode("\t", $l))) != 4) {
                 trigger_error("CTD_chem_gene_ixn_types function expects 4 fields, found {$c}!" . PHP_EOL, E_USER_WARNING);
                 return FALSE;
             }
             $first = false;
         }
         $id = $this->getVoc() . $a[1];
         $parent = trim($a[3]);
         if (isset($parent) && !empty($parent)) {
             $this->AddRDF(parent::describeClass($id, $a[0], $this->getVoc() . $parent, null, $a[2]));
         } else {
             $this->AddRDF(parent::describeClass($id, $a[0], null, null, $a[2]));
         }
         parent::WriteRDFBufferToWriteFile();
     }
     return TRUE;
 }
Ejemplo n.º 3
0
 function gene_interactions()
 {
     while ($l = parent::getReadFile()->Read()) {
         if ($l[0] == '#') {
             continue;
         }
         $data = explode("\t", $l);
         if (count($data) != 11) {
             trigger_error("Found " . count($data) . " columns, expecting 11");
             continue;
         }
         $interaction = $data[0];
         $interaction_type = str_replace("_", "-", $data[1]);
         $interaction_type_label = str_replace("_", " ", $data[1]);
         $int_additional_info = $data[2];
         $gene1 = $data[5];
         $gene2 = $data[8];
         $interaction_id = parent::getNamespace() . $interaction;
         if ($interaction_type == "Genetic") {
             $int_pred = parent::getVoc() . "genetically-interacts-with";
         } elseif ($interaction_type == "Physical") {
             $int_pred = parent::getVoc() . "physically-interacts-with";
         } elseif ($interaction_type == "Predicted") {
             $int_pred = parent::getVoc() . "predicted-to-interact-with";
         } elseif ($interaction_type == "Regulatory") {
             $int_pred = parent::getVoc() . "regulates";
         }
         //elseif
         if ($int_additional_info == "No_interaction") {
             $interaction_label = "No " . strtolower($interaction_type) . " interaction between " . $gene1 . " and " . $gene2;
             parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, parent::getVoc() . $interaction_type . "-Non-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Non-Interaction", $interaction_type_label . " non-interaction") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2));
             $npa_id = parent::getRes() . md5($interaction_id . "negative property assertion");
             $npa_label = "Negative property assertion stating that " . $gene1 . " and " . $gene2 . " do not have a " . $interaction_type_label . " interaction";
             parent::addRDF(parent::describeIndividual($npa_id, $npa_label, "owl:NegativeObjectPropertyAssertion") . parent::triplify($npa_id, "owl:sourceIndividual", parent::getNamespace() . $gene1) . parent::triplify($npa_id, "owl:targetIndividual", parent::getNamespace() . $gene2) . parent::triplify($npa_id, "owl:assertionProperty", $int_pred));
         } elseif ($int_additional_info == "N/A" || $int_additional_info == "Genetic_interaction") {
             $interaction_label = $interaction_type . " interaction between " . $gene1 . " and " . $gene2;
             parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, parent::getVoc() . $interaction_type . "-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Interaction", $interaction_type_label . " Interaction") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2) . parent::triplify(parent::getNamespace() . $gene1, $int_pred, parent::getNamespace() . $gene2));
         } else {
             $interaction_label = ($int_additional_info != "" ? $int_additional_info . " " : "") . strtolower($interaction_type) . " interaction between " . $gene1 . " and " . $gene2;
             $type = parent::getVoc() . ($int_additional_info != "" ? $int_additional_info . "-" : "") . $interaction_type . "-Interaction";
             $type_label = ($int_additional_info != "" ? $int_additional_info . " " : "") . $interaction_type_label . " Interaction";
             parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, $type) . parent::describeClass($type, $type_label, parent::getVoc() . $interaction_type . "-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Interaction", $interaction_type . " Interation") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2) . parent::triplify(parent::getNamespace() . $gene1, $int_pred, parent::getNamespace() . $gene2));
         }
         //else
         parent::WriteRDFBufferToWriteFile();
     }
     //while
 }
Ejemplo n.º 4
0
 function product($fpin)
 {
     $z = 0;
     $list = '';
     fgets($fpin);
     // header
     while ($l = fgets($fpin, 100000)) {
         $a = explode("\t", $l);
         if (count($a) != 18) {
             trigger_error("Expected 18 coloumns, instead found" . count($a));
             continue;
         }
         $product_id = parent::getNamespace() . $a[0];
         $product_label = $a[3];
         $product_type_label = ucfirst(strtolower($a[2]));
         $product_type = parent::getVoc() . str_replace(" ", "-", $product_label);
         parent::addRDF(parent::describeIndividual($product_id, $a[3], parent::getVoc() . "Product") . parent::describeClass(parent::getVoc() . "Product", "NDC Product") . parent::triplify($product_id, parent::getVoc() . "product-type", $product_type) . parent::describeIndividual($product_type, $product_type_label, parent::getVoc() . "Product-Type") . parent::describeClass(parent::getVoc() . "Product-Type", "Product Type") . parent::triplifyString($product_id, parent::getVoc() . "product-id", $a[1]) . parent::triplifyString($product_id, parent::getVoc() . "proprietary-name", $a[3]) . parent::triplifyString($product_id, parent::getVoc() . "trade-name-suffix", $a[4]));
         if ($a[5]) {
             $b = explode(";", $a[5]);
             foreach ($b as $c) {
                 parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "non-proprietary-name", trim($c)));
             }
         }
         if ($a[6]) {
             $b = explode(",", $a[6]);
             foreach ($b as $c) {
                 $dosageform = strtolower($c);
                 $dosageform_id = parent::getVoc() . str_replace(" ", "-", ucfirst(strtolower($c)));
                 parent::addRDF(parent::describeIndividual($dosageform_id, $dosageform, parent::getVoc() . "Dosage-Form") . parent::describeClass(parent::getVoc() . "Dosage-Form", "NDC Dosage Form") . parent::triplify($product_id, parent::getVoc() . "dosage-form", $dosageform_id));
             }
         }
         if ($a[7]) {
             //  MV
             $b = explode("; ", $a[7]);
             foreach ($b as $c) {
                 $route = strtolower(trim($c));
                 $route_id = parent::getVoc() . str_replace(" ", "-", ucfirst(strtolower($c)));
                 parent::addRDF(parent::describeIndividual($route_id, $route, parent::getVoc() . "Route") . parent::describeClass(parent::getVoc() . "Route", "NDC Drug Route") . parent::triplify($product_id, parent::getVoc() . "route", $route_id));
             }
         }
         if ($a[8]) {
             $date = substr(0, 4, $a[8]) . "-" . substr(4, 2, $a[8]) . "-" . substr(6, 2, $a[8]);
             parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "start-marketing-date", $date));
         }
         if ($a[9]) {
             $date = substr(0, 4, $a[9]) . "-" . substr(4, 2, $a[9]) . "-" . substr(6, 2, $a[9]);
             parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "end-marketing-date", $date));
         }
         if ($a[10]) {
             parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "marketing-category", $a[10]));
         }
         if ($a[11]) {
             parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "application-number", $a[11]));
         }
         // create a labeller node
         if ($a[12]) {
             $labeller_id = parent::getRes() . md5($a[12]);
             $label = addslashes($a[12]);
             parent::addRDF(parent::describeIndividual($labeller_id, $label, parent::getVoc() . "Labeller") . parent::describeClass(parent::getVoc() . "Labeller", "NDC Labeller") . parent::triplify($product_id, parent::getVoc() . "labeller", $labeller_id));
         }
         // the next three are together
         if ($a[13]) {
             // MV
             $substances = explode(";", $a[13]);
             $strengths = explode(";", $a[14]);
             $units = explode(";", $a[15]);
             $l = '';
             foreach ($substances as $i => $substance) {
                 // list the active ingredient
                 $ingredient_label = strtolower($substance);
                 $strength = '';
                 if (isset($strengths[$i])) {
                     $strength = $strengths[$i];
                 }
                 $unit = $units[$i];
                 $ingredient_id = parent::getRes() . md5($ingredient_label);
                 parent::addRDF(parent::describeIndividual($ingredient_id, $ingredient_label, parent::getVoc() . "Ingredient") . parent::describeClass(parent::getVoc() . "Ingredient", "NDC Ingredient") . parent::triplify($product_id, parent::getVoc() . "ingredient", $ingredient_id));
                 // describe the substance composition
                 $substance_label = "{$strength} {$unit} {$ingredient_label}";
                 $substance_id = parent::getRes() . md5($substance_label);
                 parent::addRDF(parent::describeIndividual($substance_id, $substance_label, parent::getVoc() . "Substance") . parent::triplifyString($substance_id, parent::getVoc() . "amount", $strength) . parent::describeClass(parent::getVoc() . "Substance", "NDC Substance"));
                 $unit_id = parent::getVoc() . md5($unit);
                 parent::addRDF(parent::describeIndividual($unit_id, $unit, parent::getVoc() . "Unit") . parent::describeClass(parent::getVoc() . "Unit", "NDC Unit") . parent::triplify($substance_id, parent::getVoc() . "amount_unit", $unit_id) . parent::triplify($product_id, parent::getVoc() . "has-part", $substance_id));
             }
         }
         if ($a[16]) {
             // MV
             $b = explode(",", $a[16]);
             foreach ($b as $c) {
                 $cat_id = parent::getVoc() . md5($c);
                 parent::addRDF(parent::describeIndividual($cat_id, $c, parent::getVoc() . "Pharmacological-Class") . parent::describeClass(parent::getVoc() . "Pharmacological-Class", "NDC Pharmacological Class") . parent::triplify($product_id, parent::getVoc() . "pharmacological-class", $cat_id));
             }
         }
         parent::WriteRDFBufferToWriteFile();
     }
 }
Ejemplo n.º 5
0
 function drugs()
 {
     $declared = '';
     $h = explode("\t", $this->GetReadFile()->Read(1000));
     // first line is header
     if (count($h) != 10) {
         trigger_error("Change in number of columns for drugs file", E_USER_ERROR);
         return FALSE;
     }
     while ($l = $this->GetReadFile()->Read(200000)) {
         $a = explode("\t", $l);
         $id = parent::getNamespace() . $a[0];
         $this->drugs[$a[0]] = $a[1];
         parent::addRDF(parent::describeIndividual($id, $a[1], parent::getVoc() . "Drug") . parent::describeClass(parent::getVoc() . "Drug", "PharmGKB Drug"));
         if (trim($a[2])) {
             // generic names
             // Entacapona [INN-Spanish],Entacapone [Usan:Inn],Entacaponum [INN-Latin],entacapone
             $b = explode(',', trim($a[2]));
             foreach ($b as $c) {
                 parent::addRDF(parent::triplifyString($id, parent::getVoc() . "generic_name", str_replace('"', '', $c)));
             }
             parent::addRDF(parent::describeProperty(parent::getVoc() . "generic_name", "Relationship between a PharmGKB drug and a generic name"));
         }
         if (trim($a[3])) {
             // trade names
             //Disorat,OptiPranolol,Trimepranol
             $b = explode(',', trim($a[3]));
             foreach ($b as $c) {
                 parent::addRDF(parent::triplifyString($id, parent::getVoc() . "trade_name", str_replace(array("'", "\""), array("\\\\'", ""), $c)));
             }
             parent::addRDF(parent::describeProperty(parent::getVoc() . "trade_name", "Relationship between a PharmGKB drug and a trade name"));
         }
         if (trim($a[4])) {
             // Brand Mixtures
             // Benzyl benzoate 99+ %,"Dermadex Crm (Benzoic Acid + Benzyl Benzoate + Lindane + Salicylic Acid + Zinc Oxide + Zinc Undecylenate)",
             $b = explode(',', trim($a[4]));
             foreach ($b as $c) {
                 parent::addRDF(parent::triplifyString($id, parent::getVoc() . "brand_mixture", str_replace(array("'", "\""), array("\\\\'", ""), $c)));
             }
             parent::addRDF(parent::describeProperty(parent::getVoc() . "brand_mixture", "Relationship between a PharmGKB drug and a brand mixture"));
         }
         if (trim($a[5])) {
             // Type
             parent::addRDF(parent::triplifyString($id, parent::getVoc() . "drug_class", str_replace(array("'", "\""), array("\\\\'", ""), $a[5])) . parent::describeProperty(parent::getVoc() . "drug_class", "Relationship between a PharmGKB drug and its drug class"));
         }
         if (trim($a[6])) {
             // Cross References
             // drugBank:DB00789,keggDrug:D01707,pubChemCompound:55466,pubChemSubstance:192903,url:http://en.wikipedia.org/wiki/Gadopentetate_dimeglumine
             $b = explode(',', trim(str_replace('"', '', $a[6])));
             foreach ($b as $c) {
                 $this->getRegistry()->parseQName($c, $ns, $id1);
                 $ns = str_replace(array('"', ' '), '', $ns);
                 $ns = str_replace(array('keggcompound', 'keggdrug', 'drugbank', 'uniprotkb', 'clinicaltrials.gov', 'drugsproductdatabase(dpd)', 'nationaldrugcodedirectory', 'therapeutictargetsdatabase', 'fdadruglabelatdailymed'), array('kegg', 'kegg', 'drugbank', 'uniprot', 'clinicaltrials', 'dpd', 'ndc', 'ttd', 'dailymed'), strtolower(str_replace('"', '', $ns)));
                 if ($ns == "url") {
                     parent::addRDF(parent::QQuadO_URL($id, "rdfs:seeAlso", $id));
                 } else {
                     parent::addRDF(parent::triplify($id, parent::getVoc() . "x-" . $ns, $ns . ":" . $id1));
                 }
             }
         }
         if (trim($a[9])) {
             // External Vocabulary
             // ATC:H01AC(Somatropin and somatropin agonists),ATC:V04CD(Tests for pituitary function)
             // ATC:D07AB(Corticosteroids, moderately potent (group II)) => this is why you don't use brackets and commas as separators.
             $b = explode(',', trim($a[9]), 2);
             foreach ($b as $c) {
                 preg_match_all("/ATC:([A-Z0-9]+)\\((.*)\\)\$/", $c, $m);
                 if (isset($m[1][0])) {
                     $atc = "atc:" . $m[1][0];
                     parent::addRDF(parent::triplify($id, parent::getVoc() . "x-atc", $atc));
                     if (!isset($declared[$atc])) {
                         $declared[$atc] = '';
                         parent::addRDF(parent::triplifyString($atc, "rdfs:label", $m[2][0]));
                     }
                 }
             }
         }
         parent::WriteRDFBufferToWriteFile();
     }
 }
Ejemplo n.º 6
0
 function models()
 {
     $tax_ids = array("Caenorhabditis elegans" => "6239", "Mus musculus" => "10090", "Saccharomyces cerevisiae" => "4932", "Drosophila melanogaster" => "7227", "Podospora anserina" => "5145", "Mesocricetus auratus" => "10036", "Schizosaccharomyces pombe" => "4896", "Danio rerio" => "7955");
     $h = explode(",", parent::getReadFile()->read());
     $expected_columns = 8;
     if (($n = count($h)) != $expected_columns) {
         trigger_error("Found {$n} columns in gene file - expecting {$expected_columns}!", E_USER_WARNING);
         return false;
     }
     /*
     [0] GenAge ID
     [1] symbol	
     [2] name	
     [3] organism	
     [4] entrez gene id	
     [5] avg lifespan change (max obsv)	
     [6] lifespan effect	
     [7] longevity influence
     */
     while ($l = parent::getReadFile()->read(200000)) {
         $data = str_getcsv($l);
         $genage = str_pad($data[0], 4, "0", STR_PAD_LEFT);
         $gene_symbol = $data[1];
         $name = $data[2];
         $organism = $data[3];
         $ncbi_gene_id = $data[4];
         $max_percent_obsv_avg_lifespan_change = $data[5];
         $lifespan_effect = $data[6];
         $longevity_influence = $data[7];
         $genage_id = parent::getNamespace() . $genage;
         parent::addRDF(parent::describeIndividual($genage_id, $name, parent::getVoc() . "Aging-Related-Gene") . parent::describeClass(parent::getVoc() . "Aging-Related-Gene", "Aging Related Gene"));
         parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "gene-symbol", parent::safeLiteral($gene_symbol)));
         parent::addRDF(parent::triplify($genage_id, parent::getVoc() . "taxon", "ncbitaxon:" . $tax_ids[$organism]));
         if ($ncbi_gene_id !== "") {
             parent::addRDF(parent::triplify($genage_id, parent::getVoc() . "x-ncbigene", "ncbigene:" . $ncbi_gene_id));
         }
         if ($max_percent_obsv_avg_lifespan_change !== "") {
             parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "maximum-percent-observed-average-lifespan-change", parent::safeLiteral($max_percent_obsv_avg_lifespan_change)));
         }
         if ($lifespan_effect == "Increase and Decrease") {
             parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", "increase") . parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", "decrease"));
         } else {
             parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", strtolower($lifespan_effect)));
         }
         parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "longevity-influence", strtolower($longevity_influence)));
         parent::WriteRDFBufferToWriteFile();
     }
 }