function MakeSuggestion($keyword, $ln) { $trigrams = BuildTrigrams($keyword); $query = "\"{$trigrams}\"/1"; $len = strlen($keyword); $delta = LENGTH_THRESHOLD; $weight = 'weight()'; if (SPHINX_20 == true) { $weight = '@weight'; } $stmt = $ln->prepare("SELECT *, {$weight} as w, w+:delta-ABS(len-:len) as myrank FROM suggest WHERE MATCH(:match) AND len BETWEEN :lowlen AND :highlen\r\n\t\t\tORDER BY myrank DESC, freq DESC\r\n\t\t\tLIMIT 0,:topcount OPTION ranker=wordcount"); $stmt->bindValue(':match', $query, PDO::PARAM_STR); $stmt->bindValue(':len', $len, PDO::PARAM_INT); $stmt->bindValue(':delta', $delta, PDO::PARAM_INT); $stmt->bindValue(':lowlen', $len - $delta, PDO::PARAM_INT); $stmt->bindValue(':highlen', $len + $delta, PDO::PARAM_INT); $stmt->bindValue(':topcount', TOP_COUNT, PDO::PARAM_INT); $stmt->execute(); if (!($rows = $stmt->fetchAll())) { return false; } // further restrict trigram matches with a sane Levenshtein distance limit foreach ($rows as $match) { $suggested = $match["keyword"]; if (levenshtein($keyword, $suggested) <= LEVENSHTEIN_THRESHOLD) { return $suggested; } } return $keyword; }
function MakeSuggestion($keyword) { $trigrams = BuildTrigrams($keyword); $query = "\"{$trigrams}\"/1"; $len = strlen($keyword); $delta = LENGTH_THRESHOLD; $cl = new SphinxClient(); $cl->SetMatchMode(SPH_MATCH_EXTENDED2); $cl->SetRankingMode(SPH_RANK_WORDCOUNT); $cl->SetFilterRange("len", $len - $delta, $len + $delta); $cl->SetSelect("*, @weight+{$delta}-abs(len-{$len}) AS myrank"); $cl->SetSortMode(SPH_SORT_EXTENDED, "myrank DESC, freq DESC"); $cl->SetArrayResult(true); // pull top-N best trigram matches and run them through Levenshtein $cl->SetLimits(0, TOP_COUNT); $res = $cl->Query($query, "suggest"); if (!$res || !$res["matches"]) { return false; } if (SUGGEST_DEBUG) { print "--- DEBUG START ---\n"; foreach ($res["matches"] as $match) { $w = $match["attrs"]["keyword"]; $myrank = @$match["attrs"]["myrank"]; if ($myrank) { $myrank = ", myrank={$myrank}"; } // FIXME? add costs? // FIXME! does not work with UTF-8.. THIS! IS!! PHP!!! $levdist = levenshtein($keyword, $w); print "id={$match['id']}, weight={$match['weight']}, freq={$match[attrs][freq]}{$myrank}, word={$w}, levdist={$levdist}\n"; } print "--- DEBUG END ---\n"; } // further restrict trigram matches with a sane Levenshtein distance limit foreach ($res["matches"] as $match) { $suggested = $match["attrs"]["keyword"]; if (levenshtein($keyword, $suggested) <= LEVENSHTEIN_THRESHOLD) { return $suggested; } } return $keyword; }