예제 #1
0
<?php 
// Copyright 2009 Scalable Computing Experts, Inc.
// Author: Tom Clegg
if (!getenv("APIKEY")) {
    die("Please set environment variable APIKEY to your Yahoo BOSS API key.\n");
}
chdir('public_html');
require_once 'lib/setup.php';
require_once 'lib/openid.php';
require_once 'lib/yahoo_boss.php';
ini_set("output_buffering", FALSE);
ini_set("memory_limit", 67108864);
print "Creating/updating yahoo_boss tables...";
yahoo_boss_create_tables();
print "\n";
openid_login_as_robot("Yahoo! Search Robot");
if (in_array("--update-all", $_SERVER["argv"])) {
    print "Updating variant_external for existing searches...";
    $q = theDb()->query("SELECT DISTINCT variant_id FROM yahoo_boss_cache");
    if ($q && !theDb()->isError($q)) {
        $n = 0;
        while ($row =& $q->fetchRow()) {
            yahoo_boss_update_external($row["variant_id"]);
            ++$n;
            if ($n % 10 == 0) {
                print ".";
            }
        }
        print "{$n}";
    } else {
        print "(none)";
예제 #2
0
// Copyright 2010 Scalable Computing Experts, Inc.
// Author: Tom Clegg
if ($_SERVER["argc"] != 2) {
    die("Usage: " . $_SERVER["argv"][0] . " variantAnnotations.tsv\n");
}
chdir('public_html');
require_once 'lib/setup.php';
require_once 'lib/genomes.php';
require_once 'lib/openid.php';
require_once 'lib/bp.php';
ini_set("output_buffering", FALSE);
ini_set("memory_limit", 67108864);
print "Creating/updating get-evidence tables...";
genomes_create_tables();
print "\n";
openid_login_as_robot("PharmGKB Importing Robot");
$fh = fopen($_SERVER["argv"][1], "r");
$columns = fgets($fh);
fclose($fh);
if (preg_match('/^Position on hg18\\tRSID\\t/', $columns)) {
    print "Creating temporary table...";
    $q = theDb()->query("CREATE TEMPORARY TABLE pharmgkb (\n  `chr_pos` VARCHAR(16),\n  `rsid` VARCHAR(16),\n  `gene_aa` VARCHAR(64),\n  `genes` VARCHAR(32),\n  `feature` VARCHAR(64),\n  `evidence` VARCHAR(64),\n  `annotation` TEXT,\n  `drugs` VARCHAR(255),\n  `drug_classes` VARCHAR(255),\n  `diseases` VARCHAR(255),\n  `curation_level` VARCHAR(255),\n  `pharmgkb_acc_id` VARCHAR(32),\n  INDEX(`rsid`)\n)");
} else {
    die("Unrecognized input format");
}
if (theDb()->isError($q)) {
    print $q->getMessage;
}
print "\n";
print "Importing data...";
$q = theDb()->query("LOAD DATA LOCAL INFILE ? INTO TABLE pharmgkb\n FIELDS TERMINATED BY '\t' OPTIONALLY ENCLOSED BY '\"'\n LINES TERMINATED BY '\n'\n IGNORE 1 LINES", array($_SERVER["argv"][1]));
예제 #3
0
// stdin
if ($_SERVER["argc"] == 2) {
    $fh = fopen($_SERVER["argv"][1], "r");
}
if ($fh === FALSE) {
    die("Can't open " . $_SERVER["argv"][1] . "\n");
}
chdir('public_html');
require_once 'lib/setup.php';
require_once 'lib/genomes.php';
require_once 'lib/openid.php';
require_once 'lib/bp.php';
ini_set("output_buffering", FALSE);
ini_set("memory_limit", 67108864);
genomes_create_tables();
openid_login_as_robot("Genome Importing Robot");
theDb()->query("CREATE TEMPORARY TABLE import_genomes_tmp (\n variant_id BIGINT UNSIGNED NOT NULL,\n genome_id BIGINT UNSIGNED NOT NULL,\n chr CHAR(6) NOT NULL,\n chr_pos INT UNSIGNED NOT NULL,\n trait_allele CHAR(1),\n taf TEXT,\n rsid BIGINT UNSIGNED,\n dataset_id VARCHAR(16) NOT NULL,\n zygosity ENUM('heterozygous','homozygous') NOT NULL DEFAULT 'heterozygous',\n INDEX(variant_id,dataset_id),\n INDEX(dataset_id),\n INDEX(chr,chr_pos))");
theDb()->query("CREATE TEMPORARY TABLE imported_datasets (\n dataset_id VARCHAR(16) NOT NULL,\n UNIQUE(dataset_id))");
// Dump current list of variants into import_genomes_tmp table
print "Importing ";
$ops = 0;
$job2genome = array();
$zygosity = array('hom' => 'homozygous', 'het' => 'heterozygous');
while (($line = fgets($fh)) !== FALSE) {
    ++$ops;
    if ($ops % 10000 == 0) {
        print $ops;
    }
    if ($ops % 1000 == 0) {
        print ".";
    }
예제 #4
0
// Copyright 2009 Scalable Computing Experts, Inc.
// Author: Tom Clegg
if ($_SERVER["argc"] != 2) {
    die("Usage: " . $_SERVER["argv"][0] . " gwas.csv\n");
}
chdir('public_html');
require_once 'lib/setup.php';
require_once 'lib/genomes.php';
require_once 'lib/openid.php';
require_once 'lib/bp.php';
ini_set("output_buffering", FALSE);
ini_set("memory_limit", 67108864);
print "Creating/updating get-evidence tables...";
genomes_create_tables();
print "\n";
openid_login_as_robot("GWAS Importing Robot");
$fh = fopen($_SERVER["argv"][1], "r");
$columns = fgets($fh);
fclose($fh);
print "Creating temporary table...";
if (eregi('^date added[^,]*,pubmedid,first author,date,', $columns)) {
    // Date Added to Catalog (since 11/25/08),PubMedID,First Author,Date,Journal,Link,Study,Disease/Trait,Initial Sample Size,Replication Sample Size,Region,Reported Gene(s),Strongest SNP-Risk Allele,SNPs,Risk Allele Frequency,p-Value,p-Value (text),OR or beta,95% CI (text),Platform [SNPs passing QC],CNV
    $inputformat = "genome.gov";
    $q = theDb()->query("CREATE TEMPORARY TABLE gwas (\n  `date_added` date,\n  `pmid` int unsigned,\n  `first_author` varchar(64),\n  `pub_date` VARCHAR(32),\n  `journal` varchar(64),\n  `url` varchar(255),\n  `study` varchar(255),\n  `disease_trait` varchar(255),\n  `initial_sample_size` VARCHAR(255),\n  `replication_sample_size` VARCHAR(255),\n  `region` VARCHAR(255),\n  `genes` VARCHAR(32),\n  `risk_allele` VARCHAR(32),\n  `snps` VARCHAR(32),\n  `risk_allele_frequency` VARCHAR(16),\n  `p_value` VARCHAR(32),\n  `p_value_text` VARCHAR(32),\n  `or_or_beta` VARCHAR(32),\n  `ci_95_text` VARCHAR(32),\n  `platform_SNPs_passing_QC` VARCHAR(32),\n  `cnv` CHAR(1),\n  INDEX(`snps`)\n)");
} else {
    if (eregi('^rs[^,]*, *gene *, *gene/region *, *trait', $columns)) {
        // rs Number(region location) , Gene , Gene/Region , Trait , First Author , Journal , Published Year , PubMed ID , Sample Size (Initial/Replicate) , Risk Allele [Prevalence in control] , OR/Beta [95% CI] , p-Value , platform ,OR
        $inputformat = "hugenet";
        $q = theDb()->query("CREATE TEMPORARY TABLE gwas (\n  `snps` VARCHAR(32),\n  `genes` VARCHAR(32),\n  `region` VARCHAR(255),\n  `disease_trait` varchar(255),\n  `first_author` varchar(64),\n  `journal` varchar(64),\n  `pub_date` VARCHAR(32),\n  `pmid` int unsigned,\n  `sample_size` VARCHAR(255),\n  `risk_allele` VARCHAR(32),\n  `or_or_beta` VARCHAR(32),\n  `p_value` VARCHAR(32),\n  `platform_SNPs_passing_QC` VARCHAR(32),\n  `or_or_beta_is_or` CHAR(1),\n  INDEX(`snps`)\n)");
    } else {
        die("Unrecognized input format");
예제 #5
0
<?php 
// Copyright 2010 Scalable Computing Experts, Inc.
// Author: Tom Clegg
if ($_SERVER["argc"] != 2) {
    die("Usage: " . $_SERVER["argv"][0] . " counsyl.csv\n");
}
chdir('public_html');
require_once 'lib/setup.php';
require_once 'lib/genomes.php';
require_once 'lib/openid.php';
require_once 'lib/bp.php';
chdir('..');
ini_set("output_buffering", FALSE);
ini_set("memory_limit", 67108864);
genomes_create_tables();
openid_login_as_robot("Counsyl Test Importing Robot");
print "Creating temporary table...";
theDb()->query("CREATE TEMPORARY TABLE counsyl_a (\n  `gene` VARCHAR(16) NOT NULL,\n  `aa_change` VARCHAR(12) NOT NULL,\n  `type` VARCHAR(12) NOT NULL\n)");
print "\n";
print "Importing data...";
$q = theDb()->query("LOAD DATA LOCAL INFILE ?\n INTO TABLE counsyl_a\n FIELDS TERMINATED BY ','\n LINES TERMINATED BY '\n'\n IGNORE 1 LINES", array($_SERVER["argv"][1]));
if (theDb()->isError($q)) {
    print $q->getMessage();
}
print theDb()->affectedRows();
print "\n";
print "Splitting AA field...";
$q = theDb()->query("ALTER TABLE counsyl_a\n ADD aa_pos INT,\n ADD aa_from CHAR(4),\n ADD aa_to CHAR(4)");
if (theDb()->isError($q)) {
    print $q->getMessage();
}
예제 #6
0
#!/usr/bin/php
<?php 
// Copyright 2009 Scalable Computing Experts, Inc.
// Author: Tom Clegg
if ($_SERVER["argc"] != 2) {
    die("Usage: " . $_SERVER["argv"][0] . " omim.tsv\n");
}
chdir('public_html');
require_once 'lib/setup.php';
require_once 'lib/genomes.php';
require_once 'lib/openid.php';
require_once 'lib/bp.php';
ini_set("output_buffering", FALSE);
ini_set("memory_limit", 67108864);
genomes_create_tables();
openid_login_as_robot("OMIM Importing Robot");
print "Creating temporary table...";
theDb()->query("CREATE TEMPORARY TABLE omim_a (\n  `phenotype` VARCHAR(255) NOT NULL,\n  `gene` VARCHAR(12) NOT NULL,\n  `amino_acid` VARCHAR(8) NOT NULL,\n  `codon` INT NOT NULL,\n  `word_count` INT,\n  `allelic_variant_id` VARCHAR(24)\n)");
print "\n";
print "Importing data...";
theDb()->query("LOAD DATA LOCAL INFILE ? INTO TABLE omim_a FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'", array($_SERVER["argv"][1]));
theDb()->query("ALTER TABLE omim_a ADD variant_id BIGINT UNSIGNED");
theDb()->query("ALTER TABLE omim_a ADD aa_from CHAR(4)");
theDb()->query("ALTER TABLE omim_a ADD aa_to CHAR(4)");
theDb()->query("ALTER TABLE omim_a ADD url VARCHAR(255)");
theDb()->query("ALTER TABLE omim_a ADD INDEX(gene,aa_from,codon,aa_to)");
print "\n";
print "Cleaning up gene/aa encoding...";
theDb()->query("\nUPDATE omim_a\nSET gene = UPPER(gene),\n aa_from = SUBSTRING(amino_acid,1,3),\n aa_to = IF(SUBSTRING(amino_acid,5,4)='TERM','Stop',SUBSTRING(amino_acid,5,3))\n");
print theDb()->affectedRows();
print "\n";