function lovd_getGeneInfoFromHGNC($sHgncId, $bRecursion = false) { // Uses the HGNC API to search and retrieve gene information from the HGNC // website. The first argument can be an HGNC accession number or an HGNC // approved gene symbol. The results will be returned as an associative // array. If $bRecursion == true, this function automatically handles // deprecated HGNC entries and returns the information with the correct gene // symbol. On error, this function calls lovd_errorAdd if inc-lib-form.php // was included. It always returns false on failure. // 2014-01-13; 3.0-09; Tired of problems when their URL change and they don't announce it properly; rewrite to use their new REST service. $sURL = 'http://rest.genenames.org/search/'; if (ctype_digit($sHgncId)) { $sURL .= 'hgnc_id/'; } else { $sURL .= 'symbol/'; } $sURL .= $sHgncId; $nHGNCID = 0; $aOutput = lovd_php_file($sURL, false, '', 'Accept: application/json'); if ($aOutput && ($aOutput = json_decode(implode('', $aOutput), true))) { if (!empty($aOutput['response']['numFound'])) { // 2014-08-06; 3.0-11; HGNC *again* changed their output, and once again we need to adapt quickly. $nHGNCID = preg_replace('/[^0-9]+/', '', $aOutput['response']['docs'][0]['hgnc_id']); } else { // Not found, previous symbol of...? $sURL = str_replace('/symbol/', '/prev_symbol/', $sURL); $aOutput = lovd_php_file($sURL, false, '', 'Accept: application/json'); if ($aOutput && ($aOutput = json_decode(implode('', $aOutput), true))) { if (!empty($aOutput['response']['numFound'])) { if ($aOutput['response']['numFound'] == 1 && $bRecursion) { // 2014-08-06; 3.0-11; HGNC *again* changed their output, and once again we need to adapt quickly. $nHGNCID = preg_replace('/[^0-9]+/', '', $aOutput['response']['docs'][0]['hgnc_id']); return lovd_getGeneInfoFromHGNC($nHGNCID, $bRecursion); } elseif (function_exists('lovd_errorAdd')) { $sSymbols = ''; for ($i = 0; $i < $aOutput['response']['numFound']; $i++) { $sSymbols .= (!$i ? '' : ($i == $aOutput['response']['numFound'] - 1 ? ' or ' : ', ')) . $aOutput['response']['docs'][$i]['symbol']; } lovd_errorAdd('hgnc_id', 'Entry ' . htmlspecialchars($sHgncId) . ' is deprecated according to the HGNC, please use ' . $sSymbols . '.'); } return false; } else { // Not found, maybe it's an alias? $sURL = str_replace('/prev_symbol/', '/alias_symbol/', $sURL); $aOutput = lovd_php_file($sURL, false, '', 'Accept: application/json'); if ($aOutput && ($aOutput = json_decode(implode('', $aOutput), true))) { if (!empty($aOutput['response']['numFound'])) { if ($aOutput['response']['numFound'] == 1 && $bRecursion) { // 2014-08-06; 3.0-11; HGNC *again* changed their output, and once again we need to adapt quickly. $nHGNCID = preg_replace('/[^0-9]+/', '', $aOutput['response']['docs'][0]['hgnc_id']); return lovd_getGeneInfoFromHGNC($nHGNCID, $bRecursion); } elseif (function_exists('lovd_errorAdd')) { $sSymbols = ''; for ($i = 0; $i < $aOutput['response']['numFound']; $i++) { $sSymbols .= (!$i ? '' : ($i == $aOutput['response']['numFound'] - 1 ? ' or ' : ', ')) . $aOutput['response']['docs'][$i]['symbol']; } lovd_errorAdd('hgnc_id', 'Entry ' . htmlspecialchars($sHgncId) . ' was not found, perhaps you are referring to ' . $sSymbols . '.'); } return false; } } } } } } else { if (function_exists('lovd_errorAdd')) { lovd_errorAdd('', 'Couldn\'t search for gene, probably because the HGNC is having website or database problems. Please try again later.'); } return false; } if (!$nHGNCID) { if (function_exists('lovd_errorAdd')) { lovd_errorAdd('hgnc_id', 'Entry ' . htmlspecialchars($sHgncId) . ' was not found in the HGNC database.'); } return false; } // Now that we have an ID, fetch the data. Use HGNC's fetch API. $aOutput = lovd_php_file('http://rest.genenames.org/fetch/hgnc_id/' . $nHGNCID, false, '', 'Accept: application/json'); if ($aOutput && ($aOutput = json_decode(implode('', $aOutput), true))) { if (!empty($aOutput['response']['numFound'])) { $aGene = $aOutput['response']['docs'][0]; } else { if (function_exists('lovd_errorAdd')) { lovd_errorAdd('hgnc_id', 'Couldn\'t fetch gene information, even though search results were returned. This might be a problem at the side of the HGNC, or a bug in LOVD. Please try again later.'); } return false; } } else { if (function_exists('lovd_errorAdd')) { lovd_errorAdd('', 'Couldn\'t get gene information, probably because the HGNC is having website or database problems. Please try again later.'); } return false; } // Check returned entry. // We ignore genes from the following locus groups: $aBadLocusGroups = array('phenotype', 'withdrawn'); // We ignore genes from the following locus types (most of these are in group "other"): $aBadLocusTypes = array('endogenous retrovirus', 'fragile site', 'immunoglobulin gene', 'region', 'transposable element', 'unknown', 'virus integration site', 'immunoglobulin pseudogene'); if ($aGene['status'] != 'Approved') { if (function_exists('lovd_errorAdd')) { lovd_errorAdd('hgnc_id', 'Entry ' . htmlspecialchars($sHgncId) . ' is not an approved gene symbol according to the HGNC database.'); } return false; } elseif ($aGene['location'] == 'reserved') { if (function_exists('lovd_errorAdd')) { lovd_errorAdd('hgnc_id', 'Entry ' . htmlspecialchars($sHgncId) . ' does not yet have a public association with a chromosomal location.'); } return false; } elseif (in_array($aGene['locus_group'], $aBadLocusGroups)) { if (function_exists('lovd_errorAdd')) { lovd_errorAdd('hgnc_id', 'LOVD cannot process this type of gene entry ' . htmlspecialchars($sHgncId) . ' (Locus Group: ' . $aGene['locus_group'] . ').'); } return false; } elseif (in_array($aGene['locus_type'], $aBadLocusTypes)) { if (function_exists('lovd_errorAdd')) { lovd_errorAdd('hgnc_id', 'LOVD cannot process this type of gene entry ' . htmlspecialchars($sHgncId) . ' (Locus Type: ' . $aGene['locus_type'] . ').'); } return false; } foreach (array('omim_id') as $sCol) { // Columns presented as arrays (new?), but should contain just one value. // 2014-12-23; 3.0-13; Can also not be defined. // 2016-09-14; 3.0-17; HGNC can actually return multiple OMIM IDs, // take just the first one. if (!isset($aGene[$sCol])) { $aGene[$sCol] = ''; } elseif (is_array($aGene[$sCol])) { $aGene[$sCol] = $aGene[$sCol][0]; } } // 2014-08-06; 3.0-11; HGNC ID suddenly got a prefix, removing the prefix. $aGene['hgnc_id'] = preg_replace('/[^0-9]+/', '', $aGene['hgnc_id']); // 2014-12-23; 3.0-13; Split "location" to "chromosome" and "chrom_band", which makes it easier to create this gene in the database. if ($aGene['location'] == 'mitochondria') { $aGene['chromosome'] = 'M'; $aGene['chrom_band'] = ''; } else { preg_match('/^(\\d{1,2}|[XY])(.*)$/', $aGene['location'], $aMatches); $aGene['chromosome'] = $aMatches[1]; $aGene['chrom_band'] = $aMatches[2]; } return $aGene; }
if (POST) { lovd_errorClean(); if (empty($_POST['hgnc_id'])) { lovd_errorAdd('hgnc_id', 'No HGNC ID or Gene symbol was specified'); } else { // Gene Symbol must be unique. // Enforced in the table, but we want to handle this gracefully. // When numeric, we search the id_hgnc field. When not, we search the id (gene symbol) field. $sSQL = 'SELECT id, id_hgnc FROM ' . TABLE_GENES . ' WHERE id' . (!ctype_digit($_POST['hgnc_id']) ? '' : '_hgnc') . ' = ?'; $aSQL = array($_POST['hgnc_id']); $result = $_DB->query($sSQL, $aSQL)->fetchObject(); if ($result !== false) { lovd_errorAdd('hgnc_id', sprintf('This gene entry (%s, HGNC-ID=%d) is already present in this LOVD installation.', $result->id, $result->id_hgnc)); } else { // This call already makes the needed lovd_errorAdd() calls. $aGeneInfo = lovd_getGeneInfoFromHGNC($_POST['hgnc_id']); if (!empty($aGeneInfo)) { $sHgncID = $aGeneInfo['hgnc_id']; $sSymbol = $aGeneInfo['symbol']; $sGeneName = $aGeneInfo['name']; $sChromLocation = $aGeneInfo['location']; $sEntrez = $aGeneInfo['entrez_id']; $nOmim = $aGeneInfo['omim_id']; } } } if (!lovd_error()) { $_T->printHeader(); $_T->printTitle(); require ROOT_PATH . 'class/progress_bar.php'; $sFormNextPage = '<FORM action="' . $sPath . '" id="createGene" method="post">' . "\n" . ' <INPUT type="hidden" name="workID" value="' . $_POST['workID'] . '">' . "\n" . ' <INPUT type="submit" value="Continue »">' . "\n" . ' </FORM>';