예제 #1
0
function setup()
{
    global $index;
    global $engine;
    global $store;
    $tokenizer = new PorterTokenizer();
    $store = new SQLDocumentStore(Env::getPDO(), $tokenizer);
    //$store = new MongoDBDocumentStore(ENV::get('MONGO_HOST'), ENV::get('MONGO_PORT'));
    $index = new MemcachedDocumentIndex(ENV::get('MEMCACHED_HOST'), ENV::get('MEMCACHED_PORT'));
    $ranker = new TFIDFDocumentRanker();
    $config = Config::createBuilder()->index($index)->store($store)->tokenizer($tokenizer)->ranker($ranker)->build();
    $engine = new Engine($config);
}
예제 #2
0
 public function testFindKeywords()
 {
     $engine = new Engine(Config::createBuilder()->testConfig()->build());
     $dataset = json_decode(file_get_contents(Env::get('TEST_DATASET_PATH')));
     foreach ($dataset->data as $article) {
         $engine->addDocument(new Document($article->title, $article->content));
     }
     $results = $engine->findKeywords('In computer engineering, computer architecture is the conceptual design and fundamental operational structure of a computer system.');
     $this->assertEquals('computer', $results[0]['keyword']);
     $this->assertEquals(count($dataset->data), $engine->size());
     $engine->clear('index');
     $this->assertNotEquals(0, $engine->size());
     $engine->clear('store');
     $this->assertEquals(0, $engine->size());
 }
예제 #3
0
 /**
  * Utils function used to find keywords in a query
  * @param $query string to identify keywords in
  * @return array of keywords, ordered by the ranker class
  */
 public function findKeywords($query)
 {
     $tokenizer = $this->config->getTokenizer();
     $ranker = $this->config->getRanker();
     $tokens = $tokenizer->tokenize($query);
     $ranker->init($tokens, $this->size());
     foreach ($tokens as $token) {
         $result = $this->index->search($token);
         $ranker->cacheTokenFrequency($token, count($result));
     }
     $keywords = $ranker->findKeywords($tokens);
     // If tokens are stemmed, look up original word
     if ($tokenizer instanceof Tokenizer\StemTokenizer) {
         $keywords = array_map(function ($token) use($tokenizer) {
             $token['keyword'] = $tokenizer->getWord($token['keyword']);
             return $token;
         }, $keywords);
     }
     return $keywords;
 }
예제 #4
0
 /**
  * @expectedException        \Exception
  * @expectedExceptionMessage Document Ranker not defined
  */
 public function testMissingRanker()
 {
     $config = Config::createBuilder()->index($this->index)->stopWords($this->stopWords)->store($this->store)->tokenizer($this->tokenizer)->build();
 }