<?php require_once 'libs/lib.php'; //global used fonctions @(include 'config.php'); //configuration file generated by install.php if (!defined('CONFIG')) { exit(setup()); } clean_token(); //Destroy tokens for more security clean_add_token(); ob_start(); ?> <!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" /> <link rel="stylesheet" type="text/css" href="design/default.css" media="screen,projection" /> <link rel="alternate" type="application/rss+xml" title="<?php echo $config['team']; ?> Releases RSS Feed" href="rss.xml" /> <link rel="shortcut icon" href="design/favicon.ico" /> <title><?php echo $config['team']; ?> Release Portal</title> </head> <body> <div id="contentwrapper">
/** * @brief Align words in two strings using Smith-Waterman algorithm * * Strings are split into words, and the resulting arrays are aligned using Smith-Waterman algorithm * which finds a local alignment of the two strings. Aligning words rather than characters saves * memory * * @param str1 First string (haystack) * @param str2 First string (needle) * @param html Will contain the alignment between str1 and str2 in HTML format * * @return The score (0-1) of the alignment, where 1 is a perfect match between str2 and a subsequence of str1 */ function smith_waterman($str1, $str2, &$html) { $score = 0.0; // Weights $match = 2; $mismatch = -1; $deletion = -1; $insertion = -1; // Tokenise input strings, and convert to lower case $X = tokenise_string($str1); $Y = tokenise_string($str2); // Lengths of strings $m = count($X); $n = count($Y); // Create and initialise matrix for dynamic programming $H = array(); for ($i = 0; $i <= $m; $i++) { $H[$i][0] = 0; } for ($j = 0; $j <= $m; $j++) { $H[0][$j] = 0; } $max_i = 0; $max_j = 0; $max_H = 0; for ($i = 1; $i <= $m; $i++) { for ($j = 1; $j <= $n; $j++) { $a = $H[$i - 1][$j - 1]; $s1 = clean_token($X[$i - 1]); $s2 = clean_token($Y[$j - 1]); // Compute score of four possible situations (match, mismatch, deletion, insertion if (strcasecmp($s1, $s2) == 0) { // Strings are identical $a += $match; } else { // Strings are different //$a -= levenshtein($X[$i-1], $Y[$i-1]); // allow approximate string match $a += $mismatch; // you're either the same or you're not } $b = $H[$i - 1][$j] + $deletion; $c = $H[$i][$j - 1] + $insertion; $H[$i][$j] = max(max($a, $b), $c); if ($H[$i][$j] > $max_H) { $max_H = $H[$i][$j]; $max_i = $i; $max_j = $j; } } } // Best possible score is perfect alignment with no mismatches or gaps $maximum_possible_score = count($Y) * $match; $score = $max_H / $maximum_possible_score; //echo "<p>Score=$score</p>"; // Traceback to recover alignment $alignment = array(); $value = $H[$max_i][$max_j]; $i = $max_i - 1; $j = $max_j - 1; while ($value != 0 && ($i != 0 && $j != 0)) { //echo $H[$i][$j] . "\n"; //echo $i . ',' . $j . "\n"; //echo $X[$i] . '-' . $Y[$j] . "\n"; //print_r($X); //print_r($Y); $s1 = clean_token($X[$i]); $s2 = clean_token($Y[$j]); if ($s2 != '') { array_unshift($alignment, array('pos' => $i, 'match' => strcasecmp($s1, $s2) == 0 ? 1 : 0, 'token' => $X[$i])); } $up = $H[$i - 1][$j]; $left = $H[$i][$j - 1]; $diag = $H[$i - 1][$j - 1]; if ($up > $left) { if ($up > $diag) { $i -= 1; } else { $i -= 1; $j -= 1; } } else { if ($left > $diag) { $j -= 1; } else { $i -= 1; $j -= 1; } } } //echo $i . ',' . $j . "\n"; //echo $X[$i] . '-' . $Y[$j] . "\n"; // Store last token in alignment $s1 = clean_token($X[$i]); $s2 = clean_token($Y[$j]); array_unshift($alignment, array('pos' => $i, 'match' => strcasecmp($s1, $s2) == 0 ? 1 : 0, 'token' => $X[$i])); // HTML snippet showing alignment // Local alignment $snippet = ''; $last_pos = -1; foreach ($alignment as $a) { if ($a['pos'] != $last_pos) { if ($a['match'] == 1) { $snippet .= '<span style="color:black;font-weight:bold;background-color:yellow;">'; } else { $snippet .= '<span style="color:rgb(128,128,128);font-weight:bold;background-color:yellow;">'; } $snippet .= $a['token'] . ' '; //$Z[$a['pos']] . ' '; $snippet .= '</span>'; } $last_pos = $a['pos']; } // Embed this in haystack string // Before alignment $start_pos = $alignment[0]['pos'] - 1; $prefix_start = max(0, $start_pos - 10); $prefix = ''; while ($start_pos > $prefix_start) { $prefix = $X[$start_pos] . ' ' . $prefix; $start_pos--; } if ($start_pos > 0) { $prefix = '…' . $prefix; } // After alignment $end_pos = $alignment[count($alignment) - 1]['pos'] + 1; $suffix_end = min(count($X), $end_pos + 10); $suffix = ''; while ($end_pos < $suffix_end) { $suffix .= ' ' . $X[$end_pos]; $end_pos++; } if ($end_pos < count($X)) { $suffix .= '…'; } $html = $prefix . $snippet . $suffix; return $score; }