Пример #1
0
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with LOVD.  If not, see <http://www.gnu.org/licenses/>.
 *
 *************/
define('ROOT_PATH', '../');
require ROOT_PATH . 'inc-init.php';
require ROOT_PATH . 'inc-lib-variants.php';
session_write_close();
// For protein prediction a transcript identifier and variant description
// are mandatory. Either a mitochondrial gene or a reference sequence
// identifier is also required.
if (empty($_GET['transcript']) || empty($_GET['variant']) || empty($_GET['gene']) && empty($_GET['reference'])) {
    die(json_encode(AJAX_DATA_ERROR));
}
// This check must be done after a possible check for mitochondrial genes.
// Else we might check for a gene name with a mitochondrial gene alias name.
$aGenes = lovd_getGeneList();
if (!in_array($_GET['gene'], $aGenes)) {
    die(json_encode(AJAX_DATA_ERROR));
}
// Requires at least LEVEL_SUBMITTER, anything lower has no $_AUTH whatsoever.
if (!$_AUTH) {
    // If not authorized, die with error message.
    die(json_encode(AJAX_NO_AUTH));
}
$aResult = lovd_getRNAProteinPrediction($_GET['reference'], $_GET['gene'], $_GET['transcript'], $_GET['variant']);
print json_encode($aResult);
Пример #2
0
/**
 * Predict a protein description of a variant and given transcript using the
 * Mutalyzer webservice.
 * @param $sReference
 * @param string $sGene
 * @param $sNCBITranscriptID
 * @param string $sVariant
 * @return array $aMutalyzerData
 */
function lovd_getRNAProteinPrediction($sReference, $sGene, $sNCBITranscriptID, $sVariant)
{
    global $_CONF, $_SETT;
    // Needs to be a require_once in case other code has already included this, and also for repeated calls to this function.
    require_once ROOT_PATH . 'class/soap_client.php';
    $aMutalyzerData = array();
    // Regex pattern to match a reference accession number in variant description.
    $sRefseqPattern = '(UD_\\d{12}|N(?:C|G)_\\d{6,}\\.\\d{1,2})';
    if (isset($sGene) && isset($_SETT['mito_genes_aliases'][$sGene])) {
        // This is a mitochondrial gene.
        if (empty($sNCBITranscriptID) || empty($sVariant)) {
            $aMutalyzerData['mutalyzer_error'] = 'No valid transcript ID or variant specified.';
            return $aMutalyzerData;
        }
        // Gene is defined in the mito_genes_aliases in file inc-init.php: use the NCBI gene symbol.
        $sNCBITranscriptID = str_replace($sGene, $_SETT['mito_genes_aliases'][$sGene], $sNCBITranscriptID);
        // For mitochondrial genes, Mutalyzer specifies the NCBI transcript ID actually
        // as an NC_ accession number with NCBI gene alias (e.g. 'NC_012920.1(TRNF_v001)')
        // We can use that directly as a reference in the variant description.
        $sFullVariantDescription = $sNCBITranscriptID . ':' . $sVariant;
    } else {
        // Non-mitochondrial gene, use normal reference, transcript ID and variant.
        if (empty($sReference) || empty($sNCBITranscriptID) || empty($sVariant) || !preg_match('/^' . $sRefseqPattern . '$/', $sReference)) {
            $aMutalyzerData['mutalyzer_error'] = 'No valid input given for reference, transcript id or variant.';
            return $aMutalyzerData;
        }
        $sFullVariantDescription = $sReference . '(' . $sNCBITranscriptID . '):' . $sVariant;
    }
    // Build URL for protein prediction to be shown in interface.
    $aMutalyzerData['mutalyzer_url'] = str_replace('services', 'check', $_CONF['mutalyzer_soap_url']) . '?name=' . urlencode($sFullVariantDescription) . '&standalone=1';
    // Make call to mutalyzer to check variant description.
    $_Mutalyzer = new LOVD_SoapClient();
    try {
        $oOutput = $_Mutalyzer->runMutalyzer(array('variant' => $sFullVariantDescription))->runMutalyzerResult;
    } catch (SoapFault $e) {
        $aMutalyzerData['mutalyzer_error'] = 'Unexpected response from Mutalyzer. Please try again later.';
        return $aMutalyzerData;
    }
    // When transcript is not found, attempt fallback to newer version of transcript
    foreach (getMutalyzerMessages($oOutput) as $oSoapMessage) {
        if ($oSoapMessage->errorcode === 'EINVALIDTRANSVAR') {
            // Invalid transcript variant.
            if (isset($oOutput->legend) && !empty($oOutput->legend->LegendRecord)) {
                // Check if a newer version of the transcript is available from the legend.
                list($sAccession, $sVersion) = explode('.', $sNCBITranscriptID);
                foreach ($oOutput->legend->LegendRecord as $oRecord) {
                    $aRecordFields = explode('.', $oRecord->id);
                    if (count($aRecordFields) != 2) {
                        continue;
                    }
                    list($sAltAccession, $sAltVersion) = $aRecordFields;
                    if ($sAccession == $sAltAccession && intval($sAltVersion) > intval($sVersion)) {
                        // Found a newer version of the transcript. Try to do protein
                        // prediction using that record instead.
                        $aAltMutalyzerOutput = lovd_getRNAProteinPrediction($sReference, $sGene, $oRecord->id, $sVariant);
                        if (!isset($aAltMutalyzerOutput['mutalyzer_error']) && !isset($aAltMutalyzerOutput['error']) && !empty($aAltMutalyzerOutput['predict'])) {
                            // Prediction with alternative transcript record went well, return it
                            // with an added warning.
                            $aAltMutalyzerOutput['warning']['DEPRECATED TRANSCRIPT'] = 'The provided transcript is outdated, the given prediction is ' . 'based on the latest version of the transcript: ' . $sAltAccession . '.' . $sAltVersion;
                            return $aAltMutalyzerOutput;
                        }
                    }
                }
                // Could not find a newer version of the transcript.
                $aMutalyzerData['error'][$oSoapMessage->errorcode] = trim($oSoapMessage->message);
                return $aMutalyzerData;
            }
        }
    }
    // Find protein prediction in mutalyzer output.
    if (isset($oOutput->legend) && !empty($oOutput->legend->LegendRecord) && !empty($oOutput->proteinDescriptions->string)) {
        $sMutProteinName = null;
        // Loop over legend records to find transcript name (v-number).
        foreach ($oOutput->legend->LegendRecord as $oRecord) {
            if (isset($oRecord->id) && $oRecord->id == $sNCBITranscriptID && substr($oRecord->name, -4, 1) == 'v') {
                // Generate protein isoform name (i-number) from transcript name (v-number)
                $sMutProteinName = str_replace('_v', '_i', $oRecord->name);
                break;
            }
        }
        if (isset($sMutProteinName)) {
            // Select protein description based on protein isoform (i-number).
            $sProteinDescriptions = implode('|', $oOutput->proteinDescriptions->string);
            preg_match('/' . $sRefseqPattern . '\\(' . preg_quote($sMutProteinName) . '\\):(p\\..+?)(\\||$)/', $sProteinDescriptions, $aProteinMatches);
            if (isset($aProteinMatches[2])) {
                $aMutalyzerData['predict']['protein'] = $aProteinMatches[2];
            }
        }
    }
    foreach (getMutalyzerMessages($oOutput) as $oSoapMessage) {
        if ($oSoapMessage->errorcode === 'ERANGE') {
            // Ignore 'ERANGE' as an actual error, because we can always interpret this as p.(=), p.? or p.0.
            $sDNAChange = substr($sVariant, strpos($sVariant, ':') + 1);
            $aVariantRange = explode('_', $sDNAChange);
            // Check what the variant looks like and act accordingly.
            if (count($aVariantRange) === 2 && preg_match('/-\\d+/', $aVariantRange[0]) && preg_match('/-\\d+/', $aVariantRange[1])) {
                // Variant has 2 positions. Variant has both the start and end positions upstream of the transcript, we can assume that the product will not be affected.
                $sPredictR = 'r.(=)';
                $sPredictP = 'p.(=)';
            } elseif (count($aVariantRange) === 2 && preg_match('/-\\d+/', $aVariantRange[0]) && preg_match('/\\*\\d+/', $aVariantRange[1])) {
                // Variant has 2 positions. Variant has an upstream start position and a downstream end position, we can assume that the product will not be expressed.
                $sPredictR = 'r.0?';
                $sPredictP = 'p.0?';
            } elseif (count($aVariantRange) == 2 && preg_match('/\\*\\d+/', $aVariantRange[0]) && preg_match('/\\*\\d+/', $aVariantRange[1])) {
                // Variant has 2 positions. Variant has both the start and end positions downstream of the transcript, we can assume that the product will not be affected.
                $sPredictR = 'r.(=)';
                $sPredictP = 'p.(=)';
            } elseif (count($aVariantRange) == 1 && preg_match('/-\\d+/', $aVariantRange[0]) || preg_match('/\\*\\d+/', $aVariantRange[0])) {
                // Variant has 1 position and is either upstream or downstream from the transcript, we can assume that the product will not be affected.
                $sPredictR = 'r.(=)';
                $sPredictP = 'p.(=)';
            } else {
                // One of the positions of the variant falls within the transcript, so we can not make any assumptions based on that.
                $sPredictR = 'r.?';
                $sPredictP = 'p.?';
            }
            // Fill in our assumption in aData to forge that this information came from Mutalyzer.
            $aMutalyzerData['predict']['protein'] = $sPredictP;
            $aMutalyzerData['predict']['RNA'] = $sPredictR;
            continue;
        } elseif ($oSoapMessage->errorcode === 'WSPLICE') {
            // Mutalyzer now (2012-12-07) returns a WSPLICE for <= 5 nucleotides from the site,
            // even though there internally is a difference between variants in splice sites,
            // and variants close to splice sites.
            // Most likely, they will include two different types of errors in the future.
            $aMutalyzerData['predict']['protein'] = 'p.?';
            $aMutalyzerData['predict']['RNA'] = 'r.spl?';
        }
        if (isset($oSoapMessage->errorcode) && substr($oSoapMessage->errorcode, 0, 1) === 'E') {
            $aMutalyzerData['error'][trim($oSoapMessage->errorcode)] = trim($oSoapMessage->message);
        } elseif (isset($oSoapMessage->errorcode)) {
            $aMutalyzerData['warning'][trim($oSoapMessage->errorcode)] = trim($oSoapMessage->message);
        }
    }
    if ($oOutput->errors === 0 && empty($aMutalyzerData['predict']['RNA'])) {
        // RNA not filled in yet.
        if (!isset($aMutalyzerData['predict']['protein'])) {
            // Non-coding transcript, Mutalyzer does not return a protein field, but also no error.
            // FIXME: Check for intronic variants here, that do not span over an exon, and give them r.(=).
            $aMutalyzerData['predict']['RNA'] = 'r.(?)';
            $aMutalyzerData['predict']['protein'] = '-';
        } elseif ($aMutalyzerData['predict']['protein'] == 'p.?') {
            $aMutalyzerData['predict']['RNA'] = 'r.?';
        } elseif ($aMutalyzerData['predict']['protein'] == 'p.(=)') {
            // FIXME: Not correct in case of substitutions e.g. in the third position of the codon, not leading to a protein change.
            $aMutalyzerData['predict']['RNA'] = 'r.(=)';
        } else {
            // RNA will default to r.(?).
            $aMutalyzerData['predict']['RNA'] = 'r.(?)';
        }
    }
    return $aMutalyzerData;
}
Пример #3
0
         $nCurator = array_search($aVariant['created_by'], $aManagerList) !== false ? $aVariant['created_by'] : $aManagerList[0];
         $_DB->query('INSERT INTO ' . TABLE_CURATES . ' VALUES (?, ?, ?, ?)', array($nCurator, $sSymbol, 1, 1));
         // Also activate default custom columns for this gene.
         lovd_addAllDefaultCustomColumns('gene', $sSymbol, 0);
     }
     // Now insert the transcript.
     $q = $_DB->query('INSERT IGNORE INTO ' . TABLE_TRANSCRIPTS . ' (' . implode(', ', array_keys($aFieldsTranscript)) . ') VALUES (?' . str_repeat(', ?', count($aFieldsTranscript) - 1) . ')', array_values($aFieldsTranscript));
     if ($q->rowCount()) {
         // Get the ID of the newly inserted transcript.
         $nID = $_DB->lastInsertId();
     } else {
         // This transcript was just added by a concurrent call to the mapping script. Get its ID and map on.
         $nID = $_DB->query('SELECT id FROM ' . TABLE_TRANSCRIPTS . ' WHERE id_ncbi = ?', array($aFieldsTranscript['id_ncbi']))->fetchColumn();
     }
     // Get the p. description too.
     $aPrediction = lovd_getRNAProteinPrediction($sRefseqUD, $sSymbol, $aFieldsTranscript['id_ncbi'], $aVariantOnTranscriptSQL[1][7]);
     $aVariantOnTranscriptSQL[1][8] = empty($aPrediction['predict']['RNA']) ? '' : $aPrediction['predict']['RNA'];
     $aVariantOnTranscriptSQL[1][9] = empty($aPrediction['predict']['protein']) ? '' : $aPrediction['predict']['protein'];
     // Map the variant to the newly inserted transcript.
     $aVariantOnTranscriptSQL[1][1] = $nID;
     if ($_DB->query($aVariantOnTranscriptSQL[0], $aVariantOnTranscriptSQL[1], false)) {
         // If the insert succeeded, save some data in the variant array for lovd_fetchDBID().
         $aVariant['aTranscripts'][$nID] = array($aFieldsTranscript['id_ncbi'], $sSymbol);
         $aVariant[$nID . '_VariantOnTranscript/DNA'] = $aVariantOnTranscriptSQL[1][7];
     }
     // Also remember that we've got this gene and transcript now.
     $aTranscriptsInLOVD[$sSymbol][$nID] = array('id' => $nID, 'id_ncbi' => $aFieldsTranscript['id_ncbi']);
 } else {
     // Mutalyzer does not have the transcript we're looking for. Don't retry this gene!
     $aFailedGenes[$sGene] = true;
 }