Example #1
0
function lovd_getGeneInfoFromHGNC($sHgncId, $bRecursion = false)
{
    // Uses the HGNC API to search and retrieve gene information from the HGNC
    // website. The first argument can be an HGNC accession number or an HGNC
    // approved gene symbol. The results will be returned as an associative
    // array. If $bRecursion == true, this function automatically handles
    // deprecated HGNC entries and returns the information with the correct gene
    // symbol. On error, this function calls lovd_errorAdd if inc-lib-form.php
    // was included. It always returns false on failure.
    // 2014-01-13; 3.0-09; Tired of problems when their URL change and they don't announce it properly; rewrite to use their new REST service.
    $sURL = 'http://rest.genenames.org/search/';
    if (ctype_digit($sHgncId)) {
        $sURL .= 'hgnc_id/';
    } else {
        $sURL .= 'symbol/';
    }
    $sURL .= $sHgncId;
    $nHGNCID = 0;
    $aOutput = lovd_php_file($sURL, false, '', 'Accept: application/json');
    if ($aOutput && ($aOutput = json_decode(implode('', $aOutput), true))) {
        if (!empty($aOutput['response']['numFound'])) {
            // 2014-08-06; 3.0-11; HGNC *again* changed their output, and once again we need to adapt quickly.
            $nHGNCID = preg_replace('/[^0-9]+/', '', $aOutput['response']['docs'][0]['hgnc_id']);
        } else {
            // Not found, previous symbol of...?
            $sURL = str_replace('/symbol/', '/prev_symbol/', $sURL);
            $aOutput = lovd_php_file($sURL, false, '', 'Accept: application/json');
            if ($aOutput && ($aOutput = json_decode(implode('', $aOutput), true))) {
                if (!empty($aOutput['response']['numFound'])) {
                    if ($aOutput['response']['numFound'] == 1 && $bRecursion) {
                        // 2014-08-06; 3.0-11; HGNC *again* changed their output, and once again we need to adapt quickly.
                        $nHGNCID = preg_replace('/[^0-9]+/', '', $aOutput['response']['docs'][0]['hgnc_id']);
                        return lovd_getGeneInfoFromHGNC($nHGNCID, $bRecursion);
                    } elseif (function_exists('lovd_errorAdd')) {
                        $sSymbols = '';
                        for ($i = 0; $i < $aOutput['response']['numFound']; $i++) {
                            $sSymbols .= (!$i ? '' : ($i == $aOutput['response']['numFound'] - 1 ? ' or ' : ', ')) . $aOutput['response']['docs'][$i]['symbol'];
                        }
                        lovd_errorAdd('hgnc_id', 'Entry ' . htmlspecialchars($sHgncId) . ' is deprecated according to the HGNC, please use ' . $sSymbols . '.');
                    }
                    return false;
                } else {
                    // Not found, maybe it's an alias?
                    $sURL = str_replace('/prev_symbol/', '/alias_symbol/', $sURL);
                    $aOutput = lovd_php_file($sURL, false, '', 'Accept: application/json');
                    if ($aOutput && ($aOutput = json_decode(implode('', $aOutput), true))) {
                        if (!empty($aOutput['response']['numFound'])) {
                            if ($aOutput['response']['numFound'] == 1 && $bRecursion) {
                                // 2014-08-06; 3.0-11; HGNC *again* changed their output, and once again we need to adapt quickly.
                                $nHGNCID = preg_replace('/[^0-9]+/', '', $aOutput['response']['docs'][0]['hgnc_id']);
                                return lovd_getGeneInfoFromHGNC($nHGNCID, $bRecursion);
                            } elseif (function_exists('lovd_errorAdd')) {
                                $sSymbols = '';
                                for ($i = 0; $i < $aOutput['response']['numFound']; $i++) {
                                    $sSymbols .= (!$i ? '' : ($i == $aOutput['response']['numFound'] - 1 ? ' or ' : ', ')) . $aOutput['response']['docs'][$i]['symbol'];
                                }
                                lovd_errorAdd('hgnc_id', 'Entry ' . htmlspecialchars($sHgncId) . ' was not found, perhaps you are referring to ' . $sSymbols . '.');
                            }
                            return false;
                        }
                    }
                }
            }
        }
    } else {
        if (function_exists('lovd_errorAdd')) {
            lovd_errorAdd('', 'Couldn\'t search for gene, probably because the HGNC is having website or database problems. Please try again later.');
        }
        return false;
    }
    if (!$nHGNCID) {
        if (function_exists('lovd_errorAdd')) {
            lovd_errorAdd('hgnc_id', 'Entry ' . htmlspecialchars($sHgncId) . ' was not found in the HGNC database.');
        }
        return false;
    }
    // Now that we have an ID, fetch the data. Use HGNC's fetch API.
    $aOutput = lovd_php_file('http://rest.genenames.org/fetch/hgnc_id/' . $nHGNCID, false, '', 'Accept: application/json');
    if ($aOutput && ($aOutput = json_decode(implode('', $aOutput), true))) {
        if (!empty($aOutput['response']['numFound'])) {
            $aGene = $aOutput['response']['docs'][0];
        } else {
            if (function_exists('lovd_errorAdd')) {
                lovd_errorAdd('hgnc_id', 'Couldn\'t fetch gene information, even though search results were returned. This might be a problem at the side of the HGNC, or a bug in LOVD. Please try again later.');
            }
            return false;
        }
    } else {
        if (function_exists('lovd_errorAdd')) {
            lovd_errorAdd('', 'Couldn\'t get gene information, probably because the HGNC is having website or database problems. Please try again later.');
        }
        return false;
    }
    // Check returned entry.
    // We ignore genes from the following locus groups:
    $aBadLocusGroups = array('phenotype', 'withdrawn');
    // We ignore genes from the following locus types (most of these are in group "other"):
    $aBadLocusTypes = array('endogenous retrovirus', 'fragile site', 'immunoglobulin gene', 'region', 'transposable element', 'unknown', 'virus integration site', 'immunoglobulin pseudogene');
    if ($aGene['status'] != 'Approved') {
        if (function_exists('lovd_errorAdd')) {
            lovd_errorAdd('hgnc_id', 'Entry ' . htmlspecialchars($sHgncId) . ' is not an approved gene symbol according to the HGNC database.');
        }
        return false;
    } elseif ($aGene['location'] == 'reserved') {
        if (function_exists('lovd_errorAdd')) {
            lovd_errorAdd('hgnc_id', 'Entry ' . htmlspecialchars($sHgncId) . ' does not yet have a public association with a chromosomal location.');
        }
        return false;
    } elseif (in_array($aGene['locus_group'], $aBadLocusGroups)) {
        if (function_exists('lovd_errorAdd')) {
            lovd_errorAdd('hgnc_id', 'LOVD cannot process this type of gene entry ' . htmlspecialchars($sHgncId) . ' (Locus Group: ' . $aGene['locus_group'] . ').');
        }
        return false;
    } elseif (in_array($aGene['locus_type'], $aBadLocusTypes)) {
        if (function_exists('lovd_errorAdd')) {
            lovd_errorAdd('hgnc_id', 'LOVD cannot process this type of gene entry ' . htmlspecialchars($sHgncId) . ' (Locus Type: ' . $aGene['locus_type'] . ').');
        }
        return false;
    }
    foreach (array('omim_id') as $sCol) {
        // Columns presented as arrays (new?), but should contain just one value.
        // 2014-12-23; 3.0-13; Can also not be defined.
        // 2016-09-14; 3.0-17; HGNC can actually return multiple OMIM IDs,
        //  take just the first one.
        if (!isset($aGene[$sCol])) {
            $aGene[$sCol] = '';
        } elseif (is_array($aGene[$sCol])) {
            $aGene[$sCol] = $aGene[$sCol][0];
        }
    }
    // 2014-08-06; 3.0-11; HGNC ID suddenly got a prefix, removing the prefix.
    $aGene['hgnc_id'] = preg_replace('/[^0-9]+/', '', $aGene['hgnc_id']);
    // 2014-12-23; 3.0-13; Split "location" to "chromosome" and "chrom_band", which makes it easier to create this gene in the database.
    if ($aGene['location'] == 'mitochondria') {
        $aGene['chromosome'] = 'M';
        $aGene['chrom_band'] = '';
    } else {
        preg_match('/^(\\d{1,2}|[XY])(.*)$/', $aGene['location'], $aMatches);
        $aGene['chromosome'] = $aMatches[1];
        $aGene['chrom_band'] = $aMatches[2];
    }
    return $aGene;
}
Example #2
0
 if (POST) {
     lovd_errorClean();
     if (empty($_POST['hgnc_id'])) {
         lovd_errorAdd('hgnc_id', 'No HGNC ID or Gene symbol was specified');
     } else {
         // Gene Symbol must be unique.
         // Enforced in the table, but we want to handle this gracefully.
         // When numeric, we search the id_hgnc field. When not, we search the id (gene symbol) field.
         $sSQL = 'SELECT id, id_hgnc FROM ' . TABLE_GENES . ' WHERE id' . (!ctype_digit($_POST['hgnc_id']) ? '' : '_hgnc') . ' = ?';
         $aSQL = array($_POST['hgnc_id']);
         $result = $_DB->query($sSQL, $aSQL)->fetchObject();
         if ($result !== false) {
             lovd_errorAdd('hgnc_id', sprintf('This gene entry (%s, HGNC-ID=%d) is already present in this LOVD installation.', $result->id, $result->id_hgnc));
         } else {
             // This call already makes the needed lovd_errorAdd() calls.
             $aGeneInfo = lovd_getGeneInfoFromHGNC($_POST['hgnc_id']);
             if (!empty($aGeneInfo)) {
                 $sHgncID = $aGeneInfo['hgnc_id'];
                 $sSymbol = $aGeneInfo['symbol'];
                 $sGeneName = $aGeneInfo['name'];
                 $sChromLocation = $aGeneInfo['location'];
                 $sEntrez = $aGeneInfo['entrez_id'];
                 $nOmim = $aGeneInfo['omim_id'];
             }
         }
     }
     if (!lovd_error()) {
         $_T->printHeader();
         $_T->printTitle();
         require ROOT_PATH . 'class/progress_bar.php';
         $sFormNextPage = '<FORM action="' . $sPath . '" id="createGene" method="post">' . "\n" . '          <INPUT type="hidden" name="workID" value="' . $_POST['workID'] . '">' . "\n" . '          <INPUT type="submit" value="Continue &raquo;">' . "\n" . '        </FORM>';