public static function createTokenizer() { $filter = new LowercaseFilter(); $filter->addFilter(new StopWordFilter(array('the', 'a', 'and', 'is', 'it', 'of', 'to', 'be', 'in'))); $filter->addFilter(new ShortWordFilter(2)); $tokenizer = new WordTokenizer($filter); return $tokenizer; }
public function setUp() { $filter = new LowercaseFilter(); $filter->addFilter(new StopWordFilter(array('the', 'a', 'and', 'is', 'it'))); $this->tokenizer = new WordTokenizer($filter); $this->classifier = new Classifier($this->tokenizer); $this->train(); }