function MakePhaseSuggestion($words, $query, $ln_sph) { $suggested = array(); $llimf = 0; $i = 0; foreach ($words as $key => $word) { if ($word['docs'] != 0) { $llimf += $word['docs']; } $i++; } $llimf = $llimf / ($i * $i); foreach ($words as $key => $word) { if ($word['docs'] == 0 | $word['docs'] < $llimf) { $mis[] = $word['keyword']; } } if (count($mis) > 0) { foreach ($mis as $m) { $re = MakeSuggestion($m, $ln_sph); if ($re) { if ($m != $re) { $suggested[$m] = $re; } } } if (count($words) == 1 && empty($suggested)) { return false; } $phrase = explode(' ', $query); foreach ($phrase as $k => $word) { if (isset($suggested[strtolower($word)])) { $phrase[$k] = $suggested[strtolower($word)]; } } $phrase = implode(' ', $phrase); return $phrase; } else { return false; } }
$myrank = ", myrank={$myrank}"; } $levdist = levenshtein($keyword, $w); print "id={$match['id']}, weight={$match['weight']}, freq={$match[attrs][freq]}{$myrank}, word={$w}, levdist={$levdist}\n"; } print "--- DEBUG END ---\n"; } // further restrict trigram matches with a sane Levenshtein distance limit foreach ($res["matches"] as $match) { $suggested = $match["attrs"]["keyword"]; if (levenshtein($keyword, $suggested) <= LEVENSHTEIN_THRESHOLD) { return $suggested; } } return $keyword; } /// main if ($_SERVER["argc"] < 2) { die("usage:\n" . "php suggest.php --builddict\treads stopwords from stdin, prints SQL dump of the dictionary to stdout\n" . "php suggest.php --query WORD\tqueries Sphinx, prints suggestion\n"); } if ($_SERVER["argv"][1] == "--builddict") { $in = fopen("php://stdin", "r"); $out = fopen("php://stdout", "w+"); BuildDictionarySQL($out, $in); } if ($_SERVER["argv"][1] == "--query") { mysql_connect("localhost", "root", "") or die("mysql_connect() failed: " . mysql_error()); mysql_select_db("test") or die("mysql_select_db() failed: " . mysql_error()); $keyword = $_SERVER["argv"][2]; printf("keyword: %s\nsuggestion: %s\n", $keyword, MakeSuggestion($keyword)); }