/**
 * Returns the best snippet that contains the query from the $text passed in
 * as a parameter. For now this is a pretty dumb function, it could definitely
 * be improved in the future.
 *
 * NOTE: This method is VERY basic and it will not get amazing results, but
 * it's a start.
 * NOTE: This seems to not work correctly if $query contains diacritics.
 *
 * ATTENTION: This doesn't current work properly if tags are in here, it just
 * cuts them in half.
 *
 * @param {String} $text The original text from which we want to extract
 *     the snippet.
 * @param {String} $query The query text.
 * @return {String} The cut down string.
 */
function getSnippet($text, $query)
{
    if ($query == '') {
        $suffix = 250 < strlen($text) ? '...' : '';
        return substr($text, 0, min(strlen($text), 250)) . $suffix;
    }
    $text = correctDiacritics($text);
    $query = correctDiacritics($query);
    $first_pos = stripos($text, $query);
    $start = max(0, $first_pos - 70);
    $end = min(strlen($text), $first_pos + 200);
    $prefix = $start > 0 ? '...' : '';
    $suffix = $end < strlen($text) ? '...' : '';
    return $prefix . substr($text, $start, $end - $start) . $suffix;
}
Beispiel #2
0
function getCollegeSearch($query)
{
    $ignore_words = array("str", "strada", "ale", "aleea", "din", "bld", "bulevardul", "nr", "numarul", "piata", "pta", "orasul", "comuna", "satul", "sat");
    $query = getStringWithoutDiacritics($query);
    $query = mysql_real_escape_string($query);
    $words = explode(" ", $query);
    $likes = array();
    foreach ($words as $word) {
        // Ignore one and two letter words, stopwords, and numbers;
        if (strlen($word) > 2 && !in_array(strtolower($word), $ignore_words) && (int) $word == 0) {
            $likes[] = "description LIKE '%{$word}%'";
            $likes[] = "description LIKE '{$word}%'";
            $likes[] = "description LIKE '%{$word}'";
        }
    }
    if (count($likes) == 0) {
        return array();
    }
    $where = implode(" OR ", $likes);
    $s = mysql_query("\n      SELECT *\n      FROM electoral_colleges\n      WHERE {$where}");
    $result = array();
    while ($r = mysql_fetch_array($s)) {
        // We drop the descriptions where the match is not a full word, but instead
        // only a subset of a word. So for example... if the search was "ion" and
        // the description was "ionescu", we drop it.
        //
        // We have to do it this way because MYSQL native regexp matching does not
        // handle diacritics well, whereas LIKE works perfectly.
        $clean_description = strtolower(getStringWithoutDiacritics($r['description']));
        if (countMatches($words, $clean_description, $ignore_words) == 0) {
            continue;
        }
        $key = $r['name_cdep'];
        $description = highlightWords(correctDiacritics($r['description']), $words);
        if (!array_key_exists($key, $result)) {
            $result[$key] = array();
            $result[$key]['score'] = 0;
            $result[$key]['description'] = array();
            $result[$key]['matched_words'] = array();
            $result[$key]['name_cdep'] = $r['name_cdep'];
            $result[$key]['name_senat'] = $r['name_senat'];
            $result[$key]['id'] = $r['id'];
        }
        $result[$key]['description'][] = $description;
        $result[$key]['matched_words'] = setMatchedWords($result[$key]['matched_words'], $r['description'], $words);
        if (startsWith($description, "Municipiul")) {
            $result[$key]['score'] += 2;
        }
        if (startsWith($description, "Localitate componentă") || startsWith($description, "Orașul")) {
            $result[$key]['score'] += 1;
        }
        $result[$key]['score'] += (countMatches($words, $clean_description, $ignore_words) - 1) * 2;
    }
    foreach ($result as $key => $value) {
        foreach ($words as $word) {
            $pos = strpos(strtolower($key), strtolower($word));
            if ($pos === false) {
                // string needle NOT found in haystack
            } else {
                // string needle found in haystack
                $result[$key]['score'] += 2;
            }
        }
        $result[$key]['score'] += count($result[$key]['matched_words']);
    }
    usort($result, "collegeResultCompare");
    return array_slice($result, 0, 25);
}