public function getAllKeywordsSorted($text) { // split the text into words $words = $this->config->trigger('get_words', $text); // get the candidates $keywords = $this->config->trigger('filter_keywords', $words); // normalize each candidate $normalized = $this->config->trigger('normalize_keywords', $keywords); if (count($keywords) != count($normalized)) { throw new \RuntimeException("{normalize_keywords} event returned invalid data"); } $graph = new PageRank(); $sorted = $graph->sort(array_values($normalized), true); if ($sorted == $normalized) { // PageRank failed, probably because the input was invalid return []; } $top = array_slice($sorted, 0, 10); // build an index of words and positions (so we can collapse compount keywords) $index = []; $pindex = []; // search for coumpounds keywords $prev = []; $phrases = []; foreach ($normalized as $pos => $word) { if (empty($top[$word])) { if (count($prev) > 1 && count($prev) < 4) { $phrases[] = $prev; } $prev = []; continue; } $prev[] = [$pos, $word]; } if (count($prev) > 1 && count($prev) < 4) { $phrases[] = $prev; } foreach ($phrases as $prev) { $start = current($prev)[0]; $end = end($prev)[0]; $zwords = array_slice($words, $start, $end - $start + 1, true); if (count(array_filter($zwords, 'ctype_punct')) > 0) { continue; } $phrase = implode(' ', $zwords); $score = 0; foreach ($prev as $word) { $score += $top[$word[1]]; } $sorted[trim($phrase)] = $score / ($end - $start); } // denormalize each single words foreach ($normalized as $pos => $word) { if (!empty($sorted[$word]) && $word != $words[$pos]) { $sorted[$words[$pos]] = $sorted[$word]; unset($sorted[$word]); } } arsort($sorted); return $sorted; }