function process($text) { $filter = new \DefaultFilter($min_occurrence = 5, $keep_if_strength = 1); $tagger = new \Tagger('english'); $tagger->initialize($use_apc = true); $extractor = new \TermExtractor($tagger, $filter); $terms = $extractor->extract($text); $res = array(); foreach ($terms as $term_info) { list($term, $occurrence, $word_count) = $term_info; $res[] = $term; } return $res; }
//require '../TermExtractor/PermissiveFilter.php'; //$filter = new PermissiveFilter(); // Default - accept terms based on occurrence and word count // min_occurrence - specify the number of times the term must appear in the original text for it be accepted. // keep_if_strength - keep a term if the term's word count is equal to or greater than this, regardless of occurrence. require '../DefaultFilter.php'; $filter = new DefaultFilter($min_occurrence = 2, $keep_if_strength = 2); // Tagger // ------ // Create Tagger instance. // English is the only supported language at the moment. $tagger = new Tagger('english'); // Initialise the Tagger instance. // Use APC if available to store the dictionary file in memory // (otherwise it gets loaded from disk every time the Tagger is initialised). $tagger->initialize($use_apc = true); // Term Extractor // -------------- // Creater TermExtractor instance $extractor = new TermExtractor($tagger, $filter); // Extract terms from the text $terms = $extractor->extract($text); // We're outputting results in plain text... header('Content-Type: text/plain; charset=UTF-8'); // Loop through extracted terms and print each term on a new line foreach ($terms as $term_info) { // index 0: term // index 1: number of occurrences in text // index 2: word count list($term, $occurrence, $word_count) = $term_info; echo "{$term}\n";