function Run() { echo "processing miriam database"; // directory shortcuts $ldir = $this->getParameterValue('indir'); $odir = $this->getParameterValue('outdir'); // download and set the read file $file = 'miriam.xml'; $rfile = $this->getParameterValue("download_url"); $lfile = $ldir . $file; if (!file_exists($lfile) || $this->getParameterValue("download") == "true") { utils::downloadSingle($rfile, $lfile); } parent::setReadFile($lfile); // set the write file $outfile = "miriam." . parent::getParameterValue('output_format'); $gz = strstr(parent::getParameterValue('output_format'), ".gz") === FALSE ? false : true; parent::setWriteFile(parent::getParameterValue("outdir") . $outfile, $gz); $this->parse(); parent::WriteRDFBufferToWriteFile(); $this->getWriteFile()->Close(); return true; }
function CTD_chem_gene_ixn_types() { $first = true; while ($l = $this->GetReadFile()->Read()) { if ($l[0] == '#') { continue; } $a = explode("\t", $l); // check number of columns if ($first) { if (($c = count(explode("\t", $l))) != 4) { trigger_error("CTD_chem_gene_ixn_types function expects 4 fields, found {$c}!" . PHP_EOL, E_USER_WARNING); return FALSE; } $first = false; } $id = $this->getVoc() . $a[1]; $parent = trim($a[3]); if (isset($parent) && !empty($parent)) { $this->AddRDF(parent::describeClass($id, $a[0], $this->getVoc() . $parent, null, $a[2])); } else { $this->AddRDF(parent::describeClass($id, $a[0], null, null, $a[2])); } parent::WriteRDFBufferToWriteFile(); } return TRUE; }
function gene_interactions() { while ($l = parent::getReadFile()->Read()) { if ($l[0] == '#') { continue; } $data = explode("\t", $l); if (count($data) != 11) { trigger_error("Found " . count($data) . " columns, expecting 11"); continue; } $interaction = $data[0]; $interaction_type = str_replace("_", "-", $data[1]); $interaction_type_label = str_replace("_", " ", $data[1]); $int_additional_info = $data[2]; $gene1 = $data[5]; $gene2 = $data[8]; $interaction_id = parent::getNamespace() . $interaction; if ($interaction_type == "Genetic") { $int_pred = parent::getVoc() . "genetically-interacts-with"; } elseif ($interaction_type == "Physical") { $int_pred = parent::getVoc() . "physically-interacts-with"; } elseif ($interaction_type == "Predicted") { $int_pred = parent::getVoc() . "predicted-to-interact-with"; } elseif ($interaction_type == "Regulatory") { $int_pred = parent::getVoc() . "regulates"; } //elseif if ($int_additional_info == "No_interaction") { $interaction_label = "No " . strtolower($interaction_type) . " interaction between " . $gene1 . " and " . $gene2; parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, parent::getVoc() . $interaction_type . "-Non-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Non-Interaction", $interaction_type_label . " non-interaction") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2)); $npa_id = parent::getRes() . md5($interaction_id . "negative property assertion"); $npa_label = "Negative property assertion stating that " . $gene1 . " and " . $gene2 . " do not have a " . $interaction_type_label . " interaction"; parent::addRDF(parent::describeIndividual($npa_id, $npa_label, "owl:NegativeObjectPropertyAssertion") . parent::triplify($npa_id, "owl:sourceIndividual", parent::getNamespace() . $gene1) . parent::triplify($npa_id, "owl:targetIndividual", parent::getNamespace() . $gene2) . parent::triplify($npa_id, "owl:assertionProperty", $int_pred)); } elseif ($int_additional_info == "N/A" || $int_additional_info == "Genetic_interaction") { $interaction_label = $interaction_type . " interaction between " . $gene1 . " and " . $gene2; parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, parent::getVoc() . $interaction_type . "-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Interaction", $interaction_type_label . " Interaction") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2) . parent::triplify(parent::getNamespace() . $gene1, $int_pred, parent::getNamespace() . $gene2)); } else { $interaction_label = ($int_additional_info != "" ? $int_additional_info . " " : "") . strtolower($interaction_type) . " interaction between " . $gene1 . " and " . $gene2; $type = parent::getVoc() . ($int_additional_info != "" ? $int_additional_info . "-" : "") . $interaction_type . "-Interaction"; $type_label = ($int_additional_info != "" ? $int_additional_info . " " : "") . $interaction_type_label . " Interaction"; parent::addRDF(parent::describeIndividual($interaction_id, $interaction_label, $type) . parent::describeClass($type, $type_label, parent::getVoc() . $interaction_type . "-Interaction") . parent::describeClass(parent::getVoc() . $interaction_type . "-Interaction", $interaction_type . " Interation") . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene1) . parent::triplify($interaction_id, parent::getVoc() . "involves", parent::getNamespace() . $gene2) . parent::triplify(parent::getNamespace() . $gene1, $int_pred, parent::getNamespace() . $gene2)); } //else parent::WriteRDFBufferToWriteFile(); } //while }
function product($fpin) { $z = 0; $list = ''; fgets($fpin); // header while ($l = fgets($fpin, 100000)) { $a = explode("\t", $l); if (count($a) != 18) { trigger_error("Expected 18 coloumns, instead found" . count($a)); continue; } $product_id = parent::getNamespace() . $a[0]; $product_label = $a[3]; $product_type_label = ucfirst(strtolower($a[2])); $product_type = parent::getVoc() . str_replace(" ", "-", $product_label); parent::addRDF(parent::describeIndividual($product_id, $a[3], parent::getVoc() . "Product") . parent::describeClass(parent::getVoc() . "Product", "NDC Product") . parent::triplify($product_id, parent::getVoc() . "product-type", $product_type) . parent::describeIndividual($product_type, $product_type_label, parent::getVoc() . "Product-Type") . parent::describeClass(parent::getVoc() . "Product-Type", "Product Type") . parent::triplifyString($product_id, parent::getVoc() . "product-id", $a[1]) . parent::triplifyString($product_id, parent::getVoc() . "proprietary-name", $a[3]) . parent::triplifyString($product_id, parent::getVoc() . "trade-name-suffix", $a[4])); if ($a[5]) { $b = explode(";", $a[5]); foreach ($b as $c) { parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "non-proprietary-name", trim($c))); } } if ($a[6]) { $b = explode(",", $a[6]); foreach ($b as $c) { $dosageform = strtolower($c); $dosageform_id = parent::getVoc() . str_replace(" ", "-", ucfirst(strtolower($c))); parent::addRDF(parent::describeIndividual($dosageform_id, $dosageform, parent::getVoc() . "Dosage-Form") . parent::describeClass(parent::getVoc() . "Dosage-Form", "NDC Dosage Form") . parent::triplify($product_id, parent::getVoc() . "dosage-form", $dosageform_id)); } } if ($a[7]) { // MV $b = explode("; ", $a[7]); foreach ($b as $c) { $route = strtolower(trim($c)); $route_id = parent::getVoc() . str_replace(" ", "-", ucfirst(strtolower($c))); parent::addRDF(parent::describeIndividual($route_id, $route, parent::getVoc() . "Route") . parent::describeClass(parent::getVoc() . "Route", "NDC Drug Route") . parent::triplify($product_id, parent::getVoc() . "route", $route_id)); } } if ($a[8]) { $date = substr(0, 4, $a[8]) . "-" . substr(4, 2, $a[8]) . "-" . substr(6, 2, $a[8]); parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "start-marketing-date", $date)); } if ($a[9]) { $date = substr(0, 4, $a[9]) . "-" . substr(4, 2, $a[9]) . "-" . substr(6, 2, $a[9]); parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "end-marketing-date", $date)); } if ($a[10]) { parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "marketing-category", $a[10])); } if ($a[11]) { parent::addRDF(parent::triplifyString($product_id, parent::getVoc() . "application-number", $a[11])); } // create a labeller node if ($a[12]) { $labeller_id = parent::getRes() . md5($a[12]); $label = addslashes($a[12]); parent::addRDF(parent::describeIndividual($labeller_id, $label, parent::getVoc() . "Labeller") . parent::describeClass(parent::getVoc() . "Labeller", "NDC Labeller") . parent::triplify($product_id, parent::getVoc() . "labeller", $labeller_id)); } // the next three are together if ($a[13]) { // MV $substances = explode(";", $a[13]); $strengths = explode(";", $a[14]); $units = explode(";", $a[15]); $l = ''; foreach ($substances as $i => $substance) { // list the active ingredient $ingredient_label = strtolower($substance); $strength = ''; if (isset($strengths[$i])) { $strength = $strengths[$i]; } $unit = $units[$i]; $ingredient_id = parent::getRes() . md5($ingredient_label); parent::addRDF(parent::describeIndividual($ingredient_id, $ingredient_label, parent::getVoc() . "Ingredient") . parent::describeClass(parent::getVoc() . "Ingredient", "NDC Ingredient") . parent::triplify($product_id, parent::getVoc() . "ingredient", $ingredient_id)); // describe the substance composition $substance_label = "{$strength} {$unit} {$ingredient_label}"; $substance_id = parent::getRes() . md5($substance_label); parent::addRDF(parent::describeIndividual($substance_id, $substance_label, parent::getVoc() . "Substance") . parent::triplifyString($substance_id, parent::getVoc() . "amount", $strength) . parent::describeClass(parent::getVoc() . "Substance", "NDC Substance")); $unit_id = parent::getVoc() . md5($unit); parent::addRDF(parent::describeIndividual($unit_id, $unit, parent::getVoc() . "Unit") . parent::describeClass(parent::getVoc() . "Unit", "NDC Unit") . parent::triplify($substance_id, parent::getVoc() . "amount_unit", $unit_id) . parent::triplify($product_id, parent::getVoc() . "has-part", $substance_id)); } } if ($a[16]) { // MV $b = explode(",", $a[16]); foreach ($b as $c) { $cat_id = parent::getVoc() . md5($c); parent::addRDF(parent::describeIndividual($cat_id, $c, parent::getVoc() . "Pharmacological-Class") . parent::describeClass(parent::getVoc() . "Pharmacological-Class", "NDC Pharmacological Class") . parent::triplify($product_id, parent::getVoc() . "pharmacological-class", $cat_id)); } } parent::WriteRDFBufferToWriteFile(); } }
function drugs() { $declared = ''; $h = explode("\t", $this->GetReadFile()->Read(1000)); // first line is header if (count($h) != 10) { trigger_error("Change in number of columns for drugs file", E_USER_ERROR); return FALSE; } while ($l = $this->GetReadFile()->Read(200000)) { $a = explode("\t", $l); $id = parent::getNamespace() . $a[0]; $this->drugs[$a[0]] = $a[1]; parent::addRDF(parent::describeIndividual($id, $a[1], parent::getVoc() . "Drug") . parent::describeClass(parent::getVoc() . "Drug", "PharmGKB Drug")); if (trim($a[2])) { // generic names // Entacapona [INN-Spanish],Entacapone [Usan:Inn],Entacaponum [INN-Latin],entacapone $b = explode(',', trim($a[2])); foreach ($b as $c) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "generic_name", str_replace('"', '', $c))); } parent::addRDF(parent::describeProperty(parent::getVoc() . "generic_name", "Relationship between a PharmGKB drug and a generic name")); } if (trim($a[3])) { // trade names //Disorat,OptiPranolol,Trimepranol $b = explode(',', trim($a[3])); foreach ($b as $c) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "trade_name", str_replace(array("'", "\""), array("\\\\'", ""), $c))); } parent::addRDF(parent::describeProperty(parent::getVoc() . "trade_name", "Relationship between a PharmGKB drug and a trade name")); } if (trim($a[4])) { // Brand Mixtures // Benzyl benzoate 99+ %,"Dermadex Crm (Benzoic Acid + Benzyl Benzoate + Lindane + Salicylic Acid + Zinc Oxide + Zinc Undecylenate)", $b = explode(',', trim($a[4])); foreach ($b as $c) { parent::addRDF(parent::triplifyString($id, parent::getVoc() . "brand_mixture", str_replace(array("'", "\""), array("\\\\'", ""), $c))); } parent::addRDF(parent::describeProperty(parent::getVoc() . "brand_mixture", "Relationship between a PharmGKB drug and a brand mixture")); } if (trim($a[5])) { // Type parent::addRDF(parent::triplifyString($id, parent::getVoc() . "drug_class", str_replace(array("'", "\""), array("\\\\'", ""), $a[5])) . parent::describeProperty(parent::getVoc() . "drug_class", "Relationship between a PharmGKB drug and its drug class")); } if (trim($a[6])) { // Cross References // drugBank:DB00789,keggDrug:D01707,pubChemCompound:55466,pubChemSubstance:192903,url:http://en.wikipedia.org/wiki/Gadopentetate_dimeglumine $b = explode(',', trim(str_replace('"', '', $a[6]))); foreach ($b as $c) { $this->getRegistry()->parseQName($c, $ns, $id1); $ns = str_replace(array('"', ' '), '', $ns); $ns = str_replace(array('keggcompound', 'keggdrug', 'drugbank', 'uniprotkb', 'clinicaltrials.gov', 'drugsproductdatabase(dpd)', 'nationaldrugcodedirectory', 'therapeutictargetsdatabase', 'fdadruglabelatdailymed'), array('kegg', 'kegg', 'drugbank', 'uniprot', 'clinicaltrials', 'dpd', 'ndc', 'ttd', 'dailymed'), strtolower(str_replace('"', '', $ns))); if ($ns == "url") { parent::addRDF(parent::QQuadO_URL($id, "rdfs:seeAlso", $id)); } else { parent::addRDF(parent::triplify($id, parent::getVoc() . "x-" . $ns, $ns . ":" . $id1)); } } } if (trim($a[9])) { // External Vocabulary // ATC:H01AC(Somatropin and somatropin agonists),ATC:V04CD(Tests for pituitary function) // ATC:D07AB(Corticosteroids, moderately potent (group II)) => this is why you don't use brackets and commas as separators. $b = explode(',', trim($a[9]), 2); foreach ($b as $c) { preg_match_all("/ATC:([A-Z0-9]+)\\((.*)\\)\$/", $c, $m); if (isset($m[1][0])) { $atc = "atc:" . $m[1][0]; parent::addRDF(parent::triplify($id, parent::getVoc() . "x-atc", $atc)); if (!isset($declared[$atc])) { $declared[$atc] = ''; parent::addRDF(parent::triplifyString($atc, "rdfs:label", $m[2][0])); } } } } parent::WriteRDFBufferToWriteFile(); } }
function models() { $tax_ids = array("Caenorhabditis elegans" => "6239", "Mus musculus" => "10090", "Saccharomyces cerevisiae" => "4932", "Drosophila melanogaster" => "7227", "Podospora anserina" => "5145", "Mesocricetus auratus" => "10036", "Schizosaccharomyces pombe" => "4896", "Danio rerio" => "7955"); $h = explode(",", parent::getReadFile()->read()); $expected_columns = 8; if (($n = count($h)) != $expected_columns) { trigger_error("Found {$n} columns in gene file - expecting {$expected_columns}!", E_USER_WARNING); return false; } /* [0] GenAge ID [1] symbol [2] name [3] organism [4] entrez gene id [5] avg lifespan change (max obsv) [6] lifespan effect [7] longevity influence */ while ($l = parent::getReadFile()->read(200000)) { $data = str_getcsv($l); $genage = str_pad($data[0], 4, "0", STR_PAD_LEFT); $gene_symbol = $data[1]; $name = $data[2]; $organism = $data[3]; $ncbi_gene_id = $data[4]; $max_percent_obsv_avg_lifespan_change = $data[5]; $lifespan_effect = $data[6]; $longevity_influence = $data[7]; $genage_id = parent::getNamespace() . $genage; parent::addRDF(parent::describeIndividual($genage_id, $name, parent::getVoc() . "Aging-Related-Gene") . parent::describeClass(parent::getVoc() . "Aging-Related-Gene", "Aging Related Gene")); parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "gene-symbol", parent::safeLiteral($gene_symbol))); parent::addRDF(parent::triplify($genage_id, parent::getVoc() . "taxon", "ncbitaxon:" . $tax_ids[$organism])); if ($ncbi_gene_id !== "") { parent::addRDF(parent::triplify($genage_id, parent::getVoc() . "x-ncbigene", "ncbigene:" . $ncbi_gene_id)); } if ($max_percent_obsv_avg_lifespan_change !== "") { parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "maximum-percent-observed-average-lifespan-change", parent::safeLiteral($max_percent_obsv_avg_lifespan_change))); } if ($lifespan_effect == "Increase and Decrease") { parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", "increase") . parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", "decrease")); } else { parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "lifespan-effect", strtolower($lifespan_effect))); } parent::addRDF(parent::triplifyString($genage_id, parent::getVoc() . "longevity-influence", strtolower($longevity_influence))); parent::WriteRDFBufferToWriteFile(); } }