/**
  *
  * @param string $queryString
  * @return QueryAbstractFactory 
  */
 public static function factory($queryString)
 {
     $tokenizer = new GeneralTokenizer();
     $tokens = $tokenizer->tokenize($queryString);
     if (count($tokens) === 1) {
         return new SingleTermQuery($queryString);
     } else {
         return new MultiTermQuery($queryString);
     }
 }
 /**
  *
  * @param string $queryString
  * @return QueryAbstractFactory
  */
 public static function factory($queryString)
 {
     $tokenizer = new GeneralTokenizer();
     $tokens = $tokenizer->tokenize($queryString);
     if (in_array($queryString[0], ['"', "'"]) && in_array($queryString[strlen($queryString) - 1], ['"', "'"])) {
         return new QuotedQuery($queryString);
     } elseif (count($tokens) === 1) {
         return new SingleTermQuery($queryString);
     } else {
         return new MultiTermQuery($queryString);
     }
 }
예제 #3
0
 /**
  * Return an array of tokenized words
  * @param string|null $fileId
  * @param \TextAnalysis\Tokenizers\TokenizerAbstract
  * @return array
  */
 public function getWords($fileId = null, $tokenizer = null)
 {
     if (!$tokenizer) {
         $tokenizer = new GeneralTokenizer();
     }
     $fileIds = [];
     if (empty($fileId)) {
         $fileIds = $this->getFileIds();
     } else {
         $fileIds = [$fileId];
     }
     $words = [];
     foreach ($fileIds as $filename) {
         $content = file_get_contents($this->getPackage()->getInstallationPath() . $filename);
         $words = array_merge($words, $tokenizer->tokenize($content));
         unset($content);
     }
     return $words;
 }
 public function testLineTokenizer()
 {
     $tokenizer = new GeneralTokenizer(PHP_EOL);
     $this->assertCount(4, $tokenizer->tokenize("This " . PHP_EOL . " has" . PHP_EOL . " some" . PHP_EOL . " words"));
 }