Inheritance: implements LanguageDetector\Sort\SortInterface
Example #1
0
 public function getAllKeywordsSorted($text)
 {
     // split the text into words
     $words = $this->config->trigger('get_words', $text);
     // get the candidates
     $keywords = $this->config->trigger('filter_keywords', $words);
     // normalize each candidate
     $normalized = $this->config->trigger('normalize_keywords', $keywords);
     if (count($keywords) != count($normalized)) {
         throw new \RuntimeException("{normalize_keywords} event returned invalid data");
     }
     $graph = new PageRank();
     $sorted = $graph->sort(array_values($normalized), true);
     if ($sorted == $normalized) {
         // PageRank failed, probably because the input was invalid
         return [];
     }
     $top = array_slice($sorted, 0, 10);
     // build an index of words and positions (so we can collapse compount keywords)
     $index = [];
     $pindex = [];
     // search for coumpounds keywords
     $prev = [];
     $phrases = [];
     foreach ($normalized as $pos => $word) {
         if (empty($top[$word])) {
             if (count($prev) > 1 && count($prev) < 4) {
                 $phrases[] = $prev;
             }
             $prev = [];
             continue;
         }
         $prev[] = [$pos, $word];
     }
     if (count($prev) > 1 && count($prev) < 4) {
         $phrases[] = $prev;
     }
     foreach ($phrases as $prev) {
         $start = current($prev)[0];
         $end = end($prev)[0];
         $zwords = array_slice($words, $start, $end - $start + 1, true);
         if (count(array_filter($zwords, 'ctype_punct')) > 0) {
             continue;
         }
         $phrase = implode(' ', $zwords);
         $score = 0;
         foreach ($prev as $word) {
             $score += $top[$word[1]];
         }
         $sorted[trim($phrase)] = $score / ($end - $start);
     }
     // denormalize each single words
     foreach ($normalized as $pos => $word) {
         if (!empty($sorted[$word]) && $word != $words[$pos]) {
             $sorted[$words[$pos]] = $sorted[$word];
             unset($sorted[$word]);
         }
     }
     arsort($sorted);
     return $sorted;
 }