Exemple #1
0
 public function buildDictionarySql()
 {
     $app = \Slim\Slim::getInstance();
     foreach (['freq_threshold', 'suggest_dubug', 'length_threshold', 'levenshtein_threshold', 'top_count'] as $var) {
         define(strtoupper($var), intval($app->config['sphinx'][$var]));
     }
     $in = fopen("php://stdin", "r");
     $out = fopen("php://stdout", "w+");
     $used_keywords = array();
     fwrite($out, "TRUNCATE suggest;\n");
     $n = 0;
     $m = 0;
     while ($line = fgets($in, 1024)) {
         list($keyword, $freq) = explode(" ", trim($line));
         $keyword = trim($keyword);
         if (strlen($keyword) < 2 || $keyword === '' || $freq < FREQ_THRESHOLD || strstr($keyword, "_") !== FALSE || strstr($keyword, "'") !== FALSE || array_key_exists($keyword, $used_keywords) === TRUE) {
             continue;
         }
         $trigrams = buildTrigrams($keyword);
         $used_keywords[$keyword] = NULL;
         fwrite($out, !$m ? "INSERT INTO suggest VALUES\n" : ",\n");
         $n++;
         fwrite($out, "( 0, '{$keyword}', '{$trigrams}', {$freq} )");
         $m++;
         if ($m % 10000 == 0) {
             fwrite($out, ";\n");
             $m = 0;
         }
     }
     if ($m) {
         fwrite($out, ";");
     }
     fwrite($out, "\n");
     $app->stop();
 }
function MakeSuggestion($keyword, $ln)
{
    $trigrams = buildTrigrams($keyword);
    $query = "\"{$trigrams}\"/1";
    $len = strlen($keyword);
    $delta = LENGTH_THRESHOLD;
    $stmt = $ln->prepare("\n\t\t\tSELECT *, weight() as w, w+:delta-ABS(len-:len) as myrank\n\t\t\tFROM slimpdsuggest\n\t\t\tWHERE MATCH(:match) AND len BETWEEN :lowlen AND :highlen\n\t\t\tORDER BY myrank DESC, freq DESC\n\t\t\tLIMIT 0,:topcount OPTION ranker=wordcount");
    $stmt->bindValue(':match', $query, PDO::PARAM_STR);
    $stmt->bindValue(':len', $len, PDO::PARAM_INT);
    $stmt->bindValue(':delta', $delta, PDO::PARAM_INT);
    $stmt->bindValue(':lowlen', $len - $delta, PDO::PARAM_INT);
    $stmt->bindValue(':highlen', $len + $delta, PDO::PARAM_INT);
    $stmt->bindValue(':topcount', TOP_COUNT, PDO::PARAM_INT);
    $stmt->execute();
    if (!($rows = $stmt->fetchAll())) {
        return false;
    }
    // further restrict trigram matches with a sane Levenshtein distance limit
    foreach ($rows as $match) {
        $suggested = $match["keyword"];
        if (levenshtein($keyword, $suggested) <= LEVENSHTEIN_THRESHOLD) {
            return $suggested;
        }
    }
    return $keyword;
}