Exemplo n.º 1
0
function process($text)
{
    $filter = new \DefaultFilter($min_occurrence = 5, $keep_if_strength = 1);
    $tagger = new \Tagger('english');
    $tagger->initialize($use_apc = true);
    $extractor = new \TermExtractor($tagger, $filter);
    $terms = $extractor->extract($text);
    $res = array();
    foreach ($terms as $term_info) {
        list($term, $occurrence, $word_count) = $term_info;
        $res[] = $term;
    }
    return $res;
}
Exemplo n.º 2
0
//require '../TermExtractor/PermissiveFilter.php';
//$filter = new PermissiveFilter();
// Default - accept terms based on occurrence and word count
// min_occurrence - specify the number of times the term must appear in the original text for it be accepted.
// keep_if_strength - keep a term if the term's word count is equal to or greater than this, regardless of occurrence.
require '../DefaultFilter.php';
$filter = new DefaultFilter($min_occurrence = 2, $keep_if_strength = 2);
// Tagger
// ------
// Create Tagger instance.
// English is the only supported language at the moment.
$tagger = new Tagger('english');
// Initialise the Tagger instance.
// Use APC if available to store the dictionary file in memory
// (otherwise it gets loaded from disk every time the Tagger is initialised).
$tagger->initialize($use_apc = true);
// Term Extractor
// --------------
// Creater TermExtractor instance
$extractor = new TermExtractor($tagger, $filter);
// Extract terms from the text
$terms = $extractor->extract($text);
// We're outputting results in plain text...
header('Content-Type: text/plain; charset=UTF-8');
// Loop through extracted terms and print each term on a new line
foreach ($terms as $term_info) {
    // index 0: term
    // index 1: number of occurrences in text
    // index 2: word count
    list($term, $occurrence, $word_count) = $term_info;
    echo "{$term}\n";