<?php require __DIR__ . '/../../_common.php'; echo PatternToNumber('AGT'); echo '<hr>'; echo PatternToNumberOptimized('GAATGGAACCATTAATCGTG'); echo '<hr>'; echo NumberToPattern(5437, 8); echo '<hr>'; echo NumberToPatternOptimized(7551, 7);
function ClumpFindingOptimized($genome, $k, $Length, $times) { /* * BetterClumpFinding(Genome, k, t, L) FrequentPatterns ← an empty set for i ←0 to 4k − 1 Clump(i) ← 0 Text ← Genome(0, L) FrequencyArray ← ComputingFrequencies(Text, k) for i ← 0 to 4k − 1 if FrequencyArray(i) ≥ t Clump(i) ← 1 for i ← 1 to |Genome| − L FirstPattern ← Genome(i − 1, k) j ← PatternToNumber(FirstPattern) FrequencyArray(j) ← FrequencyArray(j) − 1 LastPattern ← Genome(i + L − k, k) j ← PatternToNumber(LastPattern) FrequencyArray(j) ← FrequencyArray(j) + 1 if FrequencyArray(j) ≥ t Clump(j) ← 1 for i ← 0 to 4k − 1 if Clump(i) = 1 Pattern ← NumberToPattern(i, k) add Pattern to the set FrequentPatterns return FrequentPatterns */ $FrequentPatterns = array(); $Clump = array(); $_limit = pow(4, $k) - 1; for ($i = 0; $i <= $_limit; $i++) { $Clump[$i] = 0; } $Text = substr($genome, 0, $Length); $FrequencyArray = ComputingFrequencies($Text, $k); for ($j = 0; $j <= $_limit; $j++) { if ($FrequencyArray[$j] >= $times) { $Clump[$j] = 1; } } $_limit_for_i = strlen($genome) - $Length; for ($i = 1; $i <= $_limit_for_i; $i++) { $FirstPattern = substr($genome, $i - 1, $k); $j = PatternToNumberOptimized($FirstPattern); $FrequencyArray[$j]--; $LastPattern = substr($genome, $i + $Length - $k, $k); $j = PatternToNumberOptimized($LastPattern); $FrequencyArray[$j]++; if ($FrequencyArray[$j] >= $times) { $Clump[$j] = 1; } } for ($i = 0; $i <= $_limit; $i++) { if ($Clump[$i] == 1) { $Pattern = NumberToPatternOptimized($i, $k); $FrequentPatterns[] = $Pattern; } } return $FrequentPatterns; }