/** * Return best match (from database) * * @param string $query * @return array */ public function getBestMatch($query) { $query = trim($query); if (!$query) { return ['keyword' => $query, 'diff' => 100]; } $len = intval($this->text->strlen($query)); $trigram = $this->text->getTrigram($this->text->strtolower($query)); $tableName = $this->resource->getTableName('mst_misspell_index'); $select = $this->connection->select(); $relevance = '(-ABS(LENGTH(keyword) - ' . $len . ') + MATCH (trigram) AGAINST("' . $trigram . '"))'; $relevancy = new \Zend_Db_Expr($relevance . ' + frequency AS relevancy'); $select->from($tableName, ['keyword', $relevancy, 'frequency'])->order('relevancy desc')->limit(10); $keywords = $this->connection->fetchAll($select); $maxFreq = 0.0001; foreach ($keywords as $keyword) { $maxFreq = max($keyword['frequency'], $maxFreq); } $preResults = []; foreach ($keywords as $keyword) { $preResults[$keyword['keyword']] = $this->damerau->similarity($query, $keyword['keyword']) + $keyword['frequency'] * (10 / $maxFreq); } arsort($preResults); $keys = array_keys($preResults); if (count($keys) > 0) { $keyword = $keys[0]; $keyword = $this->toSameRegister($keyword, $query); $diff = $preResults[$keys[0]]; $result = ['keyword' => $keyword, 'diff' => $diff]; } else { $result = ['keyword' => $query, 'diff' => 100]; } return $result; }
/** * Execute full indexation * * @return void */ public function executeFull() { $results = []; foreach ($this->getTables() as $table => $columns) { if (!count($columns)) { continue; } foreach ($columns as $idx => $col) { $columns[$idx] = '`' . $col . '`'; } $select = $this->connection->select(); $fromColumns = new \Zend_Db_Expr('CONCAT(' . implode(",' ',", $columns) . ') as data_index'); $select->from($table, $fromColumns); $result = $this->connection->query($select); while ($row = $result->fetch()) { $data = $row['data_index']; $this->split($data, $results); } } $indexTable = $this->resource->getTableName('mst_misspell_index'); $this->connection->delete($indexTable); $rows = []; foreach ($results as $word => $freq) { $rows[] = ['keyword' => $word, 'trigram' => $this->text->getTrigram($word), 'frequency' => $freq / count($results)]; if (count($rows) > 1000) { $this->connection->insertArray($indexTable, ['keyword', 'trigram', 'frequency'], $rows); $rows = []; } } if (count($rows) > 0) { $this->connection->insertArray($indexTable, ['keyword', 'trigram', 'frequency'], $rows); } $this->connection->delete($this->resource->getTableName('mst_misspell_suggest')); }
/** * @param null|string $expected * @param string $data * @dataProvider prepareGetTrigramProvider */ public function testGetTrigram($expected, $data) { $this->assertEquals($expected, $this->model->getTrigram($data)); }