print "Updating variant_external for existing searches..."; $q = theDb()->query("SELECT DISTINCT variant_id FROM yahoo_boss_cache"); if ($q && !theDb()->isError($q)) { $n = 0; while ($row =& $q->fetchRow()) { yahoo_boss_update_external($row["variant_id"]); ++$n; if ($n % 10 == 0) { print "."; } } print "{$n}"; } else { print "(none)"; } print "\n"; } print "Building queue..."; $q = theDb()->query("CREATE TEMPORARY TABLE yahoo_boss_queue (\n variant_id BIGINT UNSIGNED NOT NULL\n) AS\n SELECT v.variant_id\n FROM variants v\n LEFT JOIN gene_disease\n ON gene=v.variant_gene\n LEFT JOIN flat_summary\n ON v.variant_id=flat_summary.variant_id AND n_genomes=1\n LEFT JOIN yahoo_boss_cache c\n ON c.variant_id=v.variant_id\n WHERE (gene IS NOT NULL OR flat_summary.variant_id IS NOT NULL)\n AND c.xml IS NULL\n GROUP BY v.variant_id"); if (theDb()->isError($q)) { die($q->getMessage()); } print theDb()->affectedRows(); print "\n"; $q = theDb()->query("SELECT q.variant_id variant_id, v.*\n FROM yahoo_boss_queue q\n LEFT JOIN variants v\n ON v.variant_id=q.variant_id"); while ($row =& $q->fetchRow()) { $r = yahoo_boss_lookup($row["variant_id"]); yahoo_boss_update_external($row["variant_id"]); printf("%8d %s %s%d%s (%d)\n", $r["hitcount"], $row["variant_gene"], $row["variant_aa_from"], $row["variant_aa_pos"], $row["variant_aa_to"], $row["variant_id"]); sleep(1); }
function yahoo_boss_update_external($variant_id) { $cache = yahoo_boss_lookup($variant_id); if (!$cache) { print "No search results\n"; return; } $variant = theDb()->getRow("SELECT v.*, vo.rsid rsid FROM variants v\n\t\t\t\t LEFT JOIN variant_occurs vo\n\t\t\t\t ON v.variant_id=vo.variant_id\n\t\t\t\t AND vo.rsid IS NOT NULL\n\t\t\t\t WHERE v.variant_id=?\n\t\t\t\t GROUP BY v.variant_id", array($variant_id)); if (!$variant || theDb()->isError($variant)) { print "No such variant\n"; return FALSE; } if ($variant["variant_gene"]) { $gene_aa_long = $variant["variant_gene"] . " " . $variant["variant_aa_from"] . $variant["variant_aa_pos"] . $variant["variant_aa_to"]; $gene_aa_short = $variant["variant_gene"] . " " . aa_short_form($variant["variant_aa_from"]) . $variant["variant_aa_pos"] . aa_short_form($variant["variant_aa_to"]); $search_string = "{$gene_aa_long} OR {$gene_aa_short}"; if (($rsid = $variant["variant_rsid"]) || ($rsid = $variant["rsid"])) { $search_string .= " OR rs{$rsid}"; } } else { $search_string = "rs" . $variant["variant_rsid"]; } $user_url = "http://search.yahoo.com/search?p=" . urlencode($search_string); $content = ""; $skipped_hits = 0; if ($cache["hitcount"] > 0) { preg_match_all('{<result>.*?</result>}is', $cache["xml"], $matches, PREG_PATTERN_ORDER); foreach ($matches[0] as $result) { $resulttag = array(); foreach (array("url", "dispurl", "abstract", "title") as $t) { if (preg_match("{<{$t}>(.*?)</{$t}>}i", $result, $regs)) { $resulttag[$t] = preg_replace('{<\\!\\[CDATA\\[(.*?)\\]\\]>}s', '$1', $regs[1]); } else { $resulttag = FALSE; continue; } } if (ereg("snp\\.med\\.harvard\\.edu|evidence\\.personalgenomes\\.org", $resulttag["url"])) { $skipped_hits++; continue; } if ($resulttag) { $content .= "<LI><A href=\"" . $resulttag["url"] . "\">" . $resulttag["title"] . "</A><BR />" . $resulttag["abstract"] . "<BR /><DIV class=\"searchurl\">" . $resulttag["dispurl"] . "</DIV></LI>\n"; } } } // If we skipped some hits (because they point to this page or // Trait-o-matic), subtract them from the cached hitcount so "no // web results except this page" gets counted as 0 for // statistics/display. if ($skipped_hits > 0 && preg_match('/<resultset_web\\b[^<]*\\sdeephits="?(\\d+)"?/s', $cache["xml"], $regs) && $regs[1] >= $skipped_hits) { $hitcount = $regs[1] - $skipped_hits; if ($hitcount != $cache["hitcount"]) { $cache["hitcount"] = $hitcount; theDb()->query("UPDATE yahoo_boss_cache SET hitcount=? WHERE variant_id=?", array($hitcount, $variant_id)); } } // Build html display for variant page $content = "<UL><STRONG>Web search results (" . $cache["hitcount"] . " hit" . ($cache["hitcount"] == 1 ? "" : "s") . " -- <A href=\"" . $user_url . "\">see all</A>)</STRONG>" . $content . "</UL>"; theDb()->query("DELETE FROM variant_external WHERE variant_id=? AND tag=?", array($variant_id, "Yahoo!")); $q = theDb()->query("INSERT INTO variant_external SET variant_id=?, tag=?, content=?, url=NULL, updated=NOW()", array($variant_id, "Yahoo!", $content)); }