function setup() { global $index; global $engine; global $store; $tokenizer = new PorterTokenizer(); $store = new SQLDocumentStore(Env::getPDO(), $tokenizer); //$store = new MongoDBDocumentStore(ENV::get('MONGO_HOST'), ENV::get('MONGO_PORT')); $index = new MemcachedDocumentIndex(ENV::get('MEMCACHED_HOST'), ENV::get('MEMCACHED_PORT')); $ranker = new TFIDFDocumentRanker(); $config = Config::createBuilder()->index($index)->store($store)->tokenizer($tokenizer)->ranker($ranker)->build(); $engine = new Engine($config); }
public function testFindKeywords() { $engine = new Engine(Config::createBuilder()->testConfig()->build()); $dataset = json_decode(file_get_contents(Env::get('TEST_DATASET_PATH'))); foreach ($dataset->data as $article) { $engine->addDocument(new Document($article->title, $article->content)); } $results = $engine->findKeywords('In computer engineering, computer architecture is the conceptual design and fundamental operational structure of a computer system.'); $this->assertEquals('computer', $results[0]['keyword']); $this->assertEquals(count($dataset->data), $engine->size()); $engine->clear('index'); $this->assertNotEquals(0, $engine->size()); $engine->clear('store'); $this->assertEquals(0, $engine->size()); }
/** * Utils function used to find keywords in a query * @param $query string to identify keywords in * @return array of keywords, ordered by the ranker class */ public function findKeywords($query) { $tokenizer = $this->config->getTokenizer(); $ranker = $this->config->getRanker(); $tokens = $tokenizer->tokenize($query); $ranker->init($tokens, $this->size()); foreach ($tokens as $token) { $result = $this->index->search($token); $ranker->cacheTokenFrequency($token, count($result)); } $keywords = $ranker->findKeywords($tokens); // If tokens are stemmed, look up original word if ($tokenizer instanceof Tokenizer\StemTokenizer) { $keywords = array_map(function ($token) use($tokenizer) { $token['keyword'] = $tokenizer->getWord($token['keyword']); return $token; }, $keywords); } return $keywords; }
/** * @expectedException \Exception * @expectedExceptionMessage Document Ranker not defined */ public function testMissingRanker() { $config = Config::createBuilder()->index($this->index)->stopWords($this->stopWords)->store($this->store)->tokenizer($this->tokenizer)->build(); }