Пример #1
0
                 $aGenesAlreadyMappedTo[] = $aTranscript['gene'];
             }
         } else {
             // We don't have the gene that this transcript belongs to in the database yet.
             // Save it for now, we may need to add it later on if the variant can't be mapped to something that already exists in LOVD.
             $aGenesWeCanMapTo[] = $aTranscript['gene'];
         }
     }
 }
 $aGenesWeCanMapTo = array_unique($aGenesWeCanMapTo);
 if ($aVariant['mapping_flags'] & MAPPING_ALLOW_CREATE_GENES && count($aGenesWeCanMapTo)) {
     // We may add extra genes to map this variant to. $aGenes contains genes we can map to.
     // Try the genes one by one.
     foreach ($aGenesWeCanMapTo as $sGene) {
         // Get information from HGNC.
         $aGeneInfoFromHgnc = lovd_getGeneInfoFromHgncOld($sGene, array('gd_hgnc_id', 'gd_app_sym', 'gd_app_name', 'gd_pub_chrom_map', 'gd_locus_type', 'gd_pub_eg_id', 'md_mim_id', 'gd_pub_refseq_ids', 'md_refseq_id'), true);
         if (empty($aGeneInfoFromHgnc)) {
             // Couldn't find this gene. Try the next.
             continue;
         }
         list($sHgncID, $sSymbol, $sGeneName, $sChromLocation, $sLocusType, $sEntrez, $sOmim, $sRefseq1, $sRefseq2) = array_values($aGeneInfoFromHgnc);
         // Get LRG if it exists.
         if (!($sRefseqGenomic = lovd_getLRGbyGeneSymbol($sSymbol))) {
             // No LRG, get NG if it exists.
             if (!($sRefseqGenomic = lovd_getNGbyGeneSymbol($sSymbol))) {
                 // Also no NG, use the NC instead.
                 $sRefseqGenomic = $_SETT['human_builds'][$_CONF['refseq_build']]['ncbi_sequences'][$sChromosome];
             }
         }
         // Get UD.
         try {
Пример #2
0
function lovd_getGeneInfoFromHgncOld($sHgncId, $aCols, $bRecursion = false)
{
    // Downloads gene information from the HGNC website. The specified columns will be retrieved.
    // The first argument can be an HGNC accession number, an HGNC approved gene symbol, or boolean true to retrieve ALL genes.
    // The results will be returned as an associative array; in case all genes have been loaded an array of arrays is returned with gene symbols as keys.
    // If $bRecursion == true, this function automatically handles deprecated HGNC entries.
    // On error, this function calls lovd_errorAdd if inc-lib-form.php was included. It always returns false on failure.
    // Process columns.
    $aColumns = $aCols;
    // $aColumns will be extended with more information, whereas $aCols is used for the return value and as such should not be changed.
    $sColumns = '';
    foreach ($aCols as $sColumn) {
        $sColumns .= 'col=' . $sColumn . '&';
    }
    // Make sure we request the right data.
    if ($sHgncId === true) {
        // Boolean true; return bulk data.
        $sWhere = '';
        // Using approved symbols as array keys, so we need to get them from the HGNC.
        if (!in_array('gd_app_sym', $aCols)) {
            $sColumns .= 'col=gd_app_sym&';
            $aColumns[] = 'gd_app_sym';
        }
    } else {
        if (ctype_digit($sHgncId)) {
            // HGNC database ID.
            $sWhere = 'gd_hgnc_id%3D' . $sHgncId;
        } else {
            // FIXME; implement proper check on gene symbol.
            // Gene symbol; also match SYMBOL~withdrawn to be able to use a deprecated symbol as search key.
            $sWhere = rawurlencode('gd_app_sym IN ("' . $sHgncId . '", "' . $sHgncId . '~withdrawn")');
        }
        // We also surely need gd_app_name to check for and handle withdrawn or deprecated entries.
        if (!in_array('gd_app_name', $aCols)) {
            $sColumns .= 'col=gd_app_name&';
            $aColumns[] = 'gd_app_name';
        }
    }
    $aHgncFile = lovd_php_file('http://www.genenames.org/cgi-bin/download?' . $sColumns . 'status_opt=2&where=' . $sWhere . '&order_by=gd_app_sym_sort&limit=&format=text&submit=submit');
    // If the HGNC is having database problems, we get an HTML page.
    if (empty($aHgncFile) || stripos(implode($aHgncFile), '<html') !== false) {
        if (function_exists('lovd_errorAdd')) {
            lovd_errorAdd('', 'Couldn\'t get gene information, probably because the HGNC is having database problems.');
        }
        return false;
    }
    if ($sHgncId === true) {
        // Got bulk data.
        $aHGNCgenes = array();
        array_shift($aHgncFile);
        foreach ($aHgncFile as $sGene) {
            $aGene = array_combine($aColumns, explode("\t", $sGene));
            $sSymbol = str_replace('~withdrawn', '', $aGene['gd_app_sym']);
            if (!empty($aHGNCgenes[$sSymbol]) && $sSymbol != $aGene['gd_app_sym']) {
                // Symbol has been deprecated and then reassigned to another gene, don't overwrite that one.
                continue;
            }
            $aHGNCgenes[$sSymbol] = $aGene;
            foreach (array_diff($aColumns, $aCols) as $sUnwantedColumn) {
                // Don't return columns the caller hasn't asked for.
                unset($aHGNCgenes[$sSymbol][$sUnwantedColumn]);
            }
        }
        return $aHGNCgenes;
    }
    // Requested single entry.
    if (isset($aHgncFile[1])) {
        // Looks like we've got valid data here.
        $aGene = array_combine($aColumns, explode("\t", $aHgncFile[1]));
        // We might encorporate one or more of these locus types excludes later, so that we can throw an error without first calling mutalyzer a number of times.
        //$aBadLocusTypes = array('RNA, cluster', 'RNA, transfer', 'RNA, ribosomal', 'transposable element', 'virus integration site', 'phenotype only', 'unknown', 'region', 'complex locus constituent', 'endogenous retrovirus', 'fragile site', 'T cell receptor gene', 'T cell receptor pseudogene');
        $aBadLocusTypes = array('phenotype only');
        if ($aGene['gd_app_name'] == 'entry withdrawn') {
            if (function_exists('lovd_errorAdd')) {
                lovd_errorAdd('hgnc_id', 'Entry ' . htmlspecialchars($sHgncId) . ' no longer exists in the HGNC database.');
            }
            return false;
        } elseif (preg_match('/^symbol withdrawn, see (.+)$/', $aGene['gd_app_name'], $aRegs)) {
            if ($bRecursion) {
                return lovd_getGeneInfoFromHgncOld($aRegs[1], $aCols);
            } elseif (function_exists('lovd_errorAdd')) {
                lovd_errorAdd('hgnc_id', 'Entry ' . htmlspecialchars($sHgncId) . ' is deprecated according to the HGNC, please use ' . $aRegs[1] . '.');
            }
            return false;
        } elseif (in_array('gd_pub_chrom_map', $aCols) && $aGene['gd_pub_chrom_map'] == 'reserved') {
            if (function_exists('lovd_errorAdd')) {
                lovd_errorAdd('hgnc_id', 'Entry ' . htmlspecialchars($sHgncId) . ' does not yet have a public association with a chromosomal location');
            }
            return false;
        } elseif (in_array('gd_locus_type', $aCols) && in_array($aGene['gd_locus_type'], $aBadLocusTypes)) {
            if (function_exists('lovd_errorAdd')) {
                lovd_errorAdd('hgnc_id', 'LOVD cannot process this type of gene entry ' . htmlspecialchars($sHgncId) . ' (Locus Type: ' . $aGene['gd_locus_type'] . ').');
            }
            return false;
        }
        foreach (array_diff($aColumns, $aCols) as $sUnwantedColumn) {
            // Don't return columns the caller hasn't asked for.
            unset($aGene[$sUnwantedColumn]);
        }
        // 2016-09-14; 3.0-17; HGNC can return multiple OMIM IDs.
        if (isset($aGene['md_mim_id']) && preg_match('/^(\\d+), /', $aGene['md_mim_id'], $aRegs)) {
            // Just trim the other(s) off.
            $aGene['md_mim_id'] = $aRegs[1];
        }
        return $aGene;
    } elseif (function_exists('lovd_errorAdd')) {
        // No math found, start looking for alias. We could have included an OR in the original search, but I am not
        // sure if that would maybe have other genes pop up while the official gene is then ignored.
        // We only do this search, if we can report if of course (hence the check for lovd_errorAdd()).
        // Replace WHERE.
        $sWhere = rawurlencode('CONCAT(" ", gd_aliases, ",") LIKE "% ' . $sHgncId . ',%"');
        $aHgncFile = lovd_php_file('http://www.genenames.org/cgi-bin/download?' . $sColumns . 'status_opt=2&where=' . $sWhere . '&order_by=gd_app_sym_sort&limit=&format=text&submit=submit');
        // Just quick check if we have a match now...
        if (!empty($aHgncFile) && stripos(implode($aHgncFile), '<html') === false) {
            unset($aHgncFile[0]);
            $sSymbolList = '';
            foreach ($aHgncFile as $sLine) {
                $aGene = array_combine($aColumns, explode("\t", $sLine));
                $sSymbolList .= (!$sSymbolList ? '' : ', ') . $aGene['gd_app_sym'];
            }
            if ($sSymbolList) {
                // "Prettify" the output by replacing the last , by an "or".
                $sSymbolList = preg_replace('/, ([^ ]+)$/', " or \$1", $sSymbolList);
                lovd_errorAdd('hgnc_id', 'Entry ' . htmlspecialchars($sHgncId) . ' was not found, perhaps you are referring to ' . $sSymbolList . '?');
            } else {
                lovd_errorAdd('hgnc_id', 'Entry ' . htmlspecialchars($sHgncId) . ' was not found in the HGNC database.');
            }
        }
    }
    return false;
}
Пример #3
0
 // Now that we've parsed the transcript accession number, let's see if we need to do anything with its gene.
 if ($sSymbol != 'none' && empty($aGenesChecked[$sSymbol])) {
     // We haven't seen this gene before in this upload.
     // First try to get this gene from the database.
     if ($aGene = $_DB->query('SELECT g.refseq_UD, g.name, IF(IFNULL(t.position_g_mrna_start, 0) = IFNULL(t.position_g_mrna_end, 0), NULL, IF(t.position_g_mrna_start < t.position_g_mrna_end, "+", "-")) AS strand
                               FROM ' . TABLE_GENES . ' AS g LEFT JOIN ' . TABLE_TRANSCRIPTS . ' AS t ON (g.id = t.geneid) WHERE g.id = ? ORDER BY t.id ASC LIMIT 1', array($sSymbol))->fetchAssoc()) {
         // We've got it in the database. Check its columns.
         $aGene['columns'] = $_DB->query('SELECT colid FROM ' . TABLE_SHARED_COLS . ' WHERE geneid = ? AND colid IN("' . implode('", "', $aVOTCols) . '")', array($sSymbol))->fetchAllColumn();
         $aGenesChecked[$sSymbol] = $aGene;
     } elseif (strpos($_POST['autocreate'], 'g') !== false) {
         // We don't have this gene in the database yet. Try to add it instead.
         $_BAR->setMessage('Loading gene information for ' . $sSymbol . '...', 'done');
         if (empty($aGeneInfo)) {
             // Getting all gene information from the HGNC takes a few seconds.
             $_BAR->setMessage('Loading gene data...', 'done');
             $aGeneInfo = lovd_getGeneInfoFromHgncOld(true, array('gd_hgnc_id', 'gd_app_sym', 'gd_app_name', 'gd_pub_chrom_map', 'gd_locus_type', 'gd_pub_eg_id', 'md_mim_id'));
             if (empty($aGeneInfo)) {
                 // We can't gene information from the HGNC, so we can't add them.
                 // This is a major problem and we can't just continue; the user will have to give permission not to create new gene entries.
                 ob_start();
                 lovd_showInfoTable('Could not get any gene information from the HGNC database! If this problem persists, consider importing the file without creating new gene entries. ' . 'If this is not an option for you, please try again later.', 'stop');
                 $_BAR->setMessage(ob_get_clean(), 'done');
                 exit('</BODY></HTML>');
             }
             // Remove the Loading gene data...' message again.
             $_BAR->setMessage('Loading gene information for ' . $sSymbol . '...', 'done');
         }
         // Get HGNC data for this gene.
         while (true) {
             if (empty($aGeneInfo[$sSymbol]) || $aGeneInfo[$sSymbol]['gd_app_name'] == 'entry withdrawn' || $aGeneInfo[$sSymbol]['gd_pub_chrom_map'] == 'reserved') {
                 // Can't use this symbol.