コード例 #1
0
function MakeSuggestion($keyword, $ln)
{
    $trigrams = BuildTrigrams($keyword);
    $query = "\"{$trigrams}\"/1";
    $len = strlen($keyword);
    $delta = LENGTH_THRESHOLD;
    $weight = 'weight()';
    if (SPHINX_20 == true) {
        $weight = '@weight';
    }
    $stmt = $ln->prepare("SELECT *, {$weight} as w, w+:delta-ABS(len-:len) as myrank FROM suggest WHERE MATCH(:match) AND len BETWEEN :lowlen AND :highlen\r\n\t\t\tORDER BY myrank DESC, freq DESC\r\n\t\t\tLIMIT 0,:topcount OPTION ranker=wordcount");
    $stmt->bindValue(':match', $query, PDO::PARAM_STR);
    $stmt->bindValue(':len', $len, PDO::PARAM_INT);
    $stmt->bindValue(':delta', $delta, PDO::PARAM_INT);
    $stmt->bindValue(':lowlen', $len - $delta, PDO::PARAM_INT);
    $stmt->bindValue(':highlen', $len + $delta, PDO::PARAM_INT);
    $stmt->bindValue(':topcount', TOP_COUNT, PDO::PARAM_INT);
    $stmt->execute();
    if (!($rows = $stmt->fetchAll())) {
        return false;
    }
    // further restrict trigram matches with a sane Levenshtein distance limit
    foreach ($rows as $match) {
        $suggested = $match["keyword"];
        if (levenshtein($keyword, $suggested) <= LEVENSHTEIN_THRESHOLD) {
            return $suggested;
        }
    }
    return $keyword;
}
コード例 #2
0
ファイル: suggest.php プロジェクト: gagoel/sphinxsearch
function MakeSuggestion($keyword)
{
    $trigrams = BuildTrigrams($keyword);
    $query = "\"{$trigrams}\"/1";
    $len = strlen($keyword);
    $delta = LENGTH_THRESHOLD;
    $cl = new SphinxClient();
    $cl->SetMatchMode(SPH_MATCH_EXTENDED2);
    $cl->SetRankingMode(SPH_RANK_WORDCOUNT);
    $cl->SetFilterRange("len", $len - $delta, $len + $delta);
    $cl->SetSelect("*, @weight+{$delta}-abs(len-{$len}) AS myrank");
    $cl->SetSortMode(SPH_SORT_EXTENDED, "myrank DESC, freq DESC");
    $cl->SetArrayResult(true);
    // pull top-N best trigram matches and run them through Levenshtein
    $cl->SetLimits(0, TOP_COUNT);
    $res = $cl->Query($query, "suggest");
    if (!$res || !$res["matches"]) {
        return false;
    }
    if (SUGGEST_DEBUG) {
        print "--- DEBUG START ---\n";
        foreach ($res["matches"] as $match) {
            $w = $match["attrs"]["keyword"];
            $myrank = @$match["attrs"]["myrank"];
            if ($myrank) {
                $myrank = ", myrank={$myrank}";
            }
            // FIXME? add costs?
            // FIXME! does not work with UTF-8.. THIS! IS!! PHP!!!
            $levdist = levenshtein($keyword, $w);
            print "id={$match['id']}, weight={$match['weight']}, freq={$match[attrs][freq]}{$myrank}, word={$w}, levdist={$levdist}\n";
        }
        print "--- DEBUG END ---\n";
    }
    // further restrict trigram matches with a sane Levenshtein distance limit
    foreach ($res["matches"] as $match) {
        $suggested = $match["attrs"]["keyword"];
        if (levenshtein($keyword, $suggested) <= LEVENSHTEIN_THRESHOLD) {
            return $suggested;
        }
    }
    return $keyword;
}