<?php

require __DIR__ . '/../../_common.php';
echo ComputingFrequencies('GTTGGACCTTTTCTCCCACCCTGCTCTGTCTTTAGGTGGCAAAGTGTAGACGTCAACCATTGAAATCATCTGGGAGGTACAACGTTGCAGTACACGAATGTCTTATGTTTCCCTGTCAGTGGTGCCCTGCGATATTCGTAAAGGCATACATAGGACCTGGCGAAAAGTGAACTAGAATAAGAACATCTGGCTCATATTTAATCTATGCGTTAGAACCAATCGGCTATTGCACGAGAATGACCAAGTTGTGGCGTCTGTCGAGGCAGAGCTTCGGACCTTGACTGACAATTATCATTCGCAACCGTAATTCCGTCCGCTGTACGGGTCGAGCCAGAGTCCATGCGCGGAGTATTGGGCTAGTATCAGACCCGGCAGGAGCGGTACACATCTGAAATTGGCAAGGCGCGGATGTCCAAGTCGTACACCCTCGCCGTCCAACTGGCTCTCACTCGTGGCCATAGGCTTTAGTTCTAAAGAATCAGGATGGGATCCAGTACTGTCTCTCAATCGGGCACGGTACATTAGCTCTTCCAGATGAATTACGCTACAGCTAGCCTCTCGGGCGGGGCAAGCCGCATAACGAGAAGTCCCCGATAAAACCCTGGCGAGAAAGAAGACCGGGA', 8);
示例#2
0
function ClumpFindingOptimized($genome, $k, $Length, $times)
{
    /*
    *
    BetterClumpFinding(Genome, k, t, L)
            FrequentPatterns ← an empty set
            for i ←0 to 4k − 1
                Clump(i) ← 0
            Text ← Genome(0, L)
            FrequencyArray ← ComputingFrequencies(Text, k)
            for i ← 0 to 4k − 1
                if FrequencyArray(i) ≥ t
                    Clump(i) ← 1
            for i ← 1 to |Genome| − L
                FirstPattern ← Genome(i − 1, k)
                j ← PatternToNumber(FirstPattern)
                FrequencyArray(j) ← FrequencyArray(j) − 1
                LastPattern ← Genome(i + L − k, k)
                j ← PatternToNumber(LastPattern)
                FrequencyArray(j) ← FrequencyArray(j) + 1
                if FrequencyArray(j) ≥ t
                    Clump(j) ← 1
            for i ← 0 to 4k − 1
                if Clump(i) = 1
                    Pattern ← NumberToPattern(i, k)
                    add Pattern to the set FrequentPatterns
            return FrequentPatterns
    */
    $FrequentPatterns = array();
    $Clump = array();
    $_limit = pow(4, $k) - 1;
    for ($i = 0; $i <= $_limit; $i++) {
        $Clump[$i] = 0;
    }
    $Text = substr($genome, 0, $Length);
    $FrequencyArray = ComputingFrequencies($Text, $k);
    for ($j = 0; $j <= $_limit; $j++) {
        if ($FrequencyArray[$j] >= $times) {
            $Clump[$j] = 1;
        }
    }
    $_limit_for_i = strlen($genome) - $Length;
    for ($i = 1; $i <= $_limit_for_i; $i++) {
        $FirstPattern = substr($genome, $i - 1, $k);
        $j = PatternToNumberOptimized($FirstPattern);
        $FrequencyArray[$j]--;
        $LastPattern = substr($genome, $i + $Length - $k, $k);
        $j = PatternToNumberOptimized($LastPattern);
        $FrequencyArray[$j]++;
        if ($FrequencyArray[$j] >= $times) {
            $Clump[$j] = 1;
        }
    }
    for ($i = 0; $i <= $_limit; $i++) {
        if ($Clump[$i] == 1) {
            $Pattern = NumberToPatternOptimized($i, $k);
            $FrequentPatterns[] = $Pattern;
        }
    }
    return $FrequentPatterns;
}