* but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with LOVD. If not, see <http://www.gnu.org/licenses/>. * *************/ define('ROOT_PATH', '../'); require ROOT_PATH . 'inc-init.php'; require ROOT_PATH . 'inc-lib-variants.php'; session_write_close(); // For protein prediction a transcript identifier and variant description // are mandatory. Either a mitochondrial gene or a reference sequence // identifier is also required. if (empty($_GET['transcript']) || empty($_GET['variant']) || empty($_GET['gene']) && empty($_GET['reference'])) { die(json_encode(AJAX_DATA_ERROR)); } // This check must be done after a possible check for mitochondrial genes. // Else we might check for a gene name with a mitochondrial gene alias name. $aGenes = lovd_getGeneList(); if (!in_array($_GET['gene'], $aGenes)) { die(json_encode(AJAX_DATA_ERROR)); } // Requires at least LEVEL_SUBMITTER, anything lower has no $_AUTH whatsoever. if (!$_AUTH) { // If not authorized, die with error message. die(json_encode(AJAX_NO_AUTH)); } $aResult = lovd_getRNAProteinPrediction($_GET['reference'], $_GET['gene'], $_GET['transcript'], $_GET['variant']); print json_encode($aResult);
/** * Predict a protein description of a variant and given transcript using the * Mutalyzer webservice. * @param $sReference * @param string $sGene * @param $sNCBITranscriptID * @param string $sVariant * @return array $aMutalyzerData */ function lovd_getRNAProteinPrediction($sReference, $sGene, $sNCBITranscriptID, $sVariant) { global $_CONF, $_SETT; // Needs to be a require_once in case other code has already included this, and also for repeated calls to this function. require_once ROOT_PATH . 'class/soap_client.php'; $aMutalyzerData = array(); // Regex pattern to match a reference accession number in variant description. $sRefseqPattern = '(UD_\\d{12}|N(?:C|G)_\\d{6,}\\.\\d{1,2})'; if (isset($sGene) && isset($_SETT['mito_genes_aliases'][$sGene])) { // This is a mitochondrial gene. if (empty($sNCBITranscriptID) || empty($sVariant)) { $aMutalyzerData['mutalyzer_error'] = 'No valid transcript ID or variant specified.'; return $aMutalyzerData; } // Gene is defined in the mito_genes_aliases in file inc-init.php: use the NCBI gene symbol. $sNCBITranscriptID = str_replace($sGene, $_SETT['mito_genes_aliases'][$sGene], $sNCBITranscriptID); // For mitochondrial genes, Mutalyzer specifies the NCBI transcript ID actually // as an NC_ accession number with NCBI gene alias (e.g. 'NC_012920.1(TRNF_v001)') // We can use that directly as a reference in the variant description. $sFullVariantDescription = $sNCBITranscriptID . ':' . $sVariant; } else { // Non-mitochondrial gene, use normal reference, transcript ID and variant. if (empty($sReference) || empty($sNCBITranscriptID) || empty($sVariant) || !preg_match('/^' . $sRefseqPattern . '$/', $sReference)) { $aMutalyzerData['mutalyzer_error'] = 'No valid input given for reference, transcript id or variant.'; return $aMutalyzerData; } $sFullVariantDescription = $sReference . '(' . $sNCBITranscriptID . '):' . $sVariant; } // Build URL for protein prediction to be shown in interface. $aMutalyzerData['mutalyzer_url'] = str_replace('services', 'check', $_CONF['mutalyzer_soap_url']) . '?name=' . urlencode($sFullVariantDescription) . '&standalone=1'; // Make call to mutalyzer to check variant description. $_Mutalyzer = new LOVD_SoapClient(); try { $oOutput = $_Mutalyzer->runMutalyzer(array('variant' => $sFullVariantDescription))->runMutalyzerResult; } catch (SoapFault $e) { $aMutalyzerData['mutalyzer_error'] = 'Unexpected response from Mutalyzer. Please try again later.'; return $aMutalyzerData; } // When transcript is not found, attempt fallback to newer version of transcript foreach (getMutalyzerMessages($oOutput) as $oSoapMessage) { if ($oSoapMessage->errorcode === 'EINVALIDTRANSVAR') { // Invalid transcript variant. if (isset($oOutput->legend) && !empty($oOutput->legend->LegendRecord)) { // Check if a newer version of the transcript is available from the legend. list($sAccession, $sVersion) = explode('.', $sNCBITranscriptID); foreach ($oOutput->legend->LegendRecord as $oRecord) { $aRecordFields = explode('.', $oRecord->id); if (count($aRecordFields) != 2) { continue; } list($sAltAccession, $sAltVersion) = $aRecordFields; if ($sAccession == $sAltAccession && intval($sAltVersion) > intval($sVersion)) { // Found a newer version of the transcript. Try to do protein // prediction using that record instead. $aAltMutalyzerOutput = lovd_getRNAProteinPrediction($sReference, $sGene, $oRecord->id, $sVariant); if (!isset($aAltMutalyzerOutput['mutalyzer_error']) && !isset($aAltMutalyzerOutput['error']) && !empty($aAltMutalyzerOutput['predict'])) { // Prediction with alternative transcript record went well, return it // with an added warning. $aAltMutalyzerOutput['warning']['DEPRECATED TRANSCRIPT'] = 'The provided transcript is outdated, the given prediction is ' . 'based on the latest version of the transcript: ' . $sAltAccession . '.' . $sAltVersion; return $aAltMutalyzerOutput; } } } // Could not find a newer version of the transcript. $aMutalyzerData['error'][$oSoapMessage->errorcode] = trim($oSoapMessage->message); return $aMutalyzerData; } } } // Find protein prediction in mutalyzer output. if (isset($oOutput->legend) && !empty($oOutput->legend->LegendRecord) && !empty($oOutput->proteinDescriptions->string)) { $sMutProteinName = null; // Loop over legend records to find transcript name (v-number). foreach ($oOutput->legend->LegendRecord as $oRecord) { if (isset($oRecord->id) && $oRecord->id == $sNCBITranscriptID && substr($oRecord->name, -4, 1) == 'v') { // Generate protein isoform name (i-number) from transcript name (v-number) $sMutProteinName = str_replace('_v', '_i', $oRecord->name); break; } } if (isset($sMutProteinName)) { // Select protein description based on protein isoform (i-number). $sProteinDescriptions = implode('|', $oOutput->proteinDescriptions->string); preg_match('/' . $sRefseqPattern . '\\(' . preg_quote($sMutProteinName) . '\\):(p\\..+?)(\\||$)/', $sProteinDescriptions, $aProteinMatches); if (isset($aProteinMatches[2])) { $aMutalyzerData['predict']['protein'] = $aProteinMatches[2]; } } } foreach (getMutalyzerMessages($oOutput) as $oSoapMessage) { if ($oSoapMessage->errorcode === 'ERANGE') { // Ignore 'ERANGE' as an actual error, because we can always interpret this as p.(=), p.? or p.0. $sDNAChange = substr($sVariant, strpos($sVariant, ':') + 1); $aVariantRange = explode('_', $sDNAChange); // Check what the variant looks like and act accordingly. if (count($aVariantRange) === 2 && preg_match('/-\\d+/', $aVariantRange[0]) && preg_match('/-\\d+/', $aVariantRange[1])) { // Variant has 2 positions. Variant has both the start and end positions upstream of the transcript, we can assume that the product will not be affected. $sPredictR = 'r.(=)'; $sPredictP = 'p.(=)'; } elseif (count($aVariantRange) === 2 && preg_match('/-\\d+/', $aVariantRange[0]) && preg_match('/\\*\\d+/', $aVariantRange[1])) { // Variant has 2 positions. Variant has an upstream start position and a downstream end position, we can assume that the product will not be expressed. $sPredictR = 'r.0?'; $sPredictP = 'p.0?'; } elseif (count($aVariantRange) == 2 && preg_match('/\\*\\d+/', $aVariantRange[0]) && preg_match('/\\*\\d+/', $aVariantRange[1])) { // Variant has 2 positions. Variant has both the start and end positions downstream of the transcript, we can assume that the product will not be affected. $sPredictR = 'r.(=)'; $sPredictP = 'p.(=)'; } elseif (count($aVariantRange) == 1 && preg_match('/-\\d+/', $aVariantRange[0]) || preg_match('/\\*\\d+/', $aVariantRange[0])) { // Variant has 1 position and is either upstream or downstream from the transcript, we can assume that the product will not be affected. $sPredictR = 'r.(=)'; $sPredictP = 'p.(=)'; } else { // One of the positions of the variant falls within the transcript, so we can not make any assumptions based on that. $sPredictR = 'r.?'; $sPredictP = 'p.?'; } // Fill in our assumption in aData to forge that this information came from Mutalyzer. $aMutalyzerData['predict']['protein'] = $sPredictP; $aMutalyzerData['predict']['RNA'] = $sPredictR; continue; } elseif ($oSoapMessage->errorcode === 'WSPLICE') { // Mutalyzer now (2012-12-07) returns a WSPLICE for <= 5 nucleotides from the site, // even though there internally is a difference between variants in splice sites, // and variants close to splice sites. // Most likely, they will include two different types of errors in the future. $aMutalyzerData['predict']['protein'] = 'p.?'; $aMutalyzerData['predict']['RNA'] = 'r.spl?'; } if (isset($oSoapMessage->errorcode) && substr($oSoapMessage->errorcode, 0, 1) === 'E') { $aMutalyzerData['error'][trim($oSoapMessage->errorcode)] = trim($oSoapMessage->message); } elseif (isset($oSoapMessage->errorcode)) { $aMutalyzerData['warning'][trim($oSoapMessage->errorcode)] = trim($oSoapMessage->message); } } if ($oOutput->errors === 0 && empty($aMutalyzerData['predict']['RNA'])) { // RNA not filled in yet. if (!isset($aMutalyzerData['predict']['protein'])) { // Non-coding transcript, Mutalyzer does not return a protein field, but also no error. // FIXME: Check for intronic variants here, that do not span over an exon, and give them r.(=). $aMutalyzerData['predict']['RNA'] = 'r.(?)'; $aMutalyzerData['predict']['protein'] = '-'; } elseif ($aMutalyzerData['predict']['protein'] == 'p.?') { $aMutalyzerData['predict']['RNA'] = 'r.?'; } elseif ($aMutalyzerData['predict']['protein'] == 'p.(=)') { // FIXME: Not correct in case of substitutions e.g. in the third position of the codon, not leading to a protein change. $aMutalyzerData['predict']['RNA'] = 'r.(=)'; } else { // RNA will default to r.(?). $aMutalyzerData['predict']['RNA'] = 'r.(?)'; } } return $aMutalyzerData; }
$nCurator = array_search($aVariant['created_by'], $aManagerList) !== false ? $aVariant['created_by'] : $aManagerList[0]; $_DB->query('INSERT INTO ' . TABLE_CURATES . ' VALUES (?, ?, ?, ?)', array($nCurator, $sSymbol, 1, 1)); // Also activate default custom columns for this gene. lovd_addAllDefaultCustomColumns('gene', $sSymbol, 0); } // Now insert the transcript. $q = $_DB->query('INSERT IGNORE INTO ' . TABLE_TRANSCRIPTS . ' (' . implode(', ', array_keys($aFieldsTranscript)) . ') VALUES (?' . str_repeat(', ?', count($aFieldsTranscript) - 1) . ')', array_values($aFieldsTranscript)); if ($q->rowCount()) { // Get the ID of the newly inserted transcript. $nID = $_DB->lastInsertId(); } else { // This transcript was just added by a concurrent call to the mapping script. Get its ID and map on. $nID = $_DB->query('SELECT id FROM ' . TABLE_TRANSCRIPTS . ' WHERE id_ncbi = ?', array($aFieldsTranscript['id_ncbi']))->fetchColumn(); } // Get the p. description too. $aPrediction = lovd_getRNAProteinPrediction($sRefseqUD, $sSymbol, $aFieldsTranscript['id_ncbi'], $aVariantOnTranscriptSQL[1][7]); $aVariantOnTranscriptSQL[1][8] = empty($aPrediction['predict']['RNA']) ? '' : $aPrediction['predict']['RNA']; $aVariantOnTranscriptSQL[1][9] = empty($aPrediction['predict']['protein']) ? '' : $aPrediction['predict']['protein']; // Map the variant to the newly inserted transcript. $aVariantOnTranscriptSQL[1][1] = $nID; if ($_DB->query($aVariantOnTranscriptSQL[0], $aVariantOnTranscriptSQL[1], false)) { // If the insert succeeded, save some data in the variant array for lovd_fetchDBID(). $aVariant['aTranscripts'][$nID] = array($aFieldsTranscript['id_ncbi'], $sSymbol); $aVariant[$nID . '_VariantOnTranscript/DNA'] = $aVariantOnTranscriptSQL[1][7]; } // Also remember that we've got this gene and transcript now. $aTranscriptsInLOVD[$sSymbol][$nID] = array('id' => $nID, 'id_ncbi' => $aFieldsTranscript['id_ncbi']); } else { // Mutalyzer does not have the transcript we're looking for. Don't retry this gene! $aFailedGenes[$sGene] = true; }