/** * Find all k-mer motifs - input dna,k,d - output * @param array $lines * @return array */ public function compute(array $lines) { list($k, $d) = array_map(function ($val) { return (int) $val; }, explode(' ', $lines[0])); $strings = []; $l = count($lines); for ($i = 1; $i < $l; $i++) { if (!empty($lines[$i])) { array_push($strings, $lines[$i]); } } $motifs = []; for ($i = 0; $i < strlen($strings[0]) - $k + 1; $i++) { $patt = substr($strings[0], $i, $k); $neighbors = BioUtil::neighbors($patt, $d); foreach ($neighbors as $nb) { if (Arrays::matches($strings, function ($str) use($nb, $d) { return BioUtil::approximate_frequency($str, $nb, $d, true) > 0; }) && !Arrays::contains($motifs, $nb)) { array_push($motifs, $nb); } } } return $motifs; }
public static function neighbors($pattern, $d) { if ($d < 1) { return [$pattern]; } else { if (strlen($pattern) < 2) { return ['A', 'C', 'T', 'G']; } } $neighborhood = []; $sneighborhood = BioUtil::neighbors(suffix($pattern), $d); foreach ($sneighborhood as $suff) { if (BioUtil::hamming_distance($suff, suffix($pattern)) < $d) { foreach (['A', 'C', 'T', 'G'] as $b) { array_push($neighborhood, $b . $suff); } } else { array_push($neighborhood, $pattern[0] . $suff); } } return $neighborhood; }