/** * * @param string $queryString * @return QueryAbstractFactory */ public static function factory($queryString) { $tokenizer = new GeneralTokenizer(); $tokens = $tokenizer->tokenize($queryString); if (count($tokens) === 1) { return new SingleTermQuery($queryString); } else { return new MultiTermQuery($queryString); } }
/** * * @param string $queryString * @return QueryAbstractFactory */ public static function factory($queryString) { $tokenizer = new GeneralTokenizer(); $tokens = $tokenizer->tokenize($queryString); if (in_array($queryString[0], ['"', "'"]) && in_array($queryString[strlen($queryString) - 1], ['"', "'"])) { return new QuotedQuery($queryString); } elseif (count($tokens) === 1) { return new SingleTermQuery($queryString); } else { return new MultiTermQuery($queryString); } }
/** * Return an array of tokenized words * @param string|null $fileId * @param \TextAnalysis\Tokenizers\TokenizerAbstract * @return array */ public function getWords($fileId = null, $tokenizer = null) { if (!$tokenizer) { $tokenizer = new GeneralTokenizer(); } $fileIds = []; if (empty($fileId)) { $fileIds = $this->getFileIds(); } else { $fileIds = [$fileId]; } $words = []; foreach ($fileIds as $filename) { $content = file_get_contents($this->getPackage()->getInstallationPath() . $filename); $words = array_merge($words, $tokenizer->tokenize($content)); unset($content); } return $words; }
public function testLineTokenizer() { $tokenizer = new GeneralTokenizer(PHP_EOL); $this->assertCount(4, $tokenizer->tokenize("This " . PHP_EOL . " has" . PHP_EOL . " some" . PHP_EOL . " words")); }