/** * Returns the best snippet that contains the query from the $text passed in * as a parameter. For now this is a pretty dumb function, it could definitely * be improved in the future. * * NOTE: This method is VERY basic and it will not get amazing results, but * it's a start. * NOTE: This seems to not work correctly if $query contains diacritics. * * ATTENTION: This doesn't current work properly if tags are in here, it just * cuts them in half. * * @param {String} $text The original text from which we want to extract * the snippet. * @param {String} $query The query text. * @return {String} The cut down string. */ function getSnippet($text, $query) { if ($query == '') { $suffix = 250 < strlen($text) ? '...' : ''; return substr($text, 0, min(strlen($text), 250)) . $suffix; } $text = correctDiacritics($text); $query = correctDiacritics($query); $first_pos = stripos($text, $query); $start = max(0, $first_pos - 70); $end = min(strlen($text), $first_pos + 200); $prefix = $start > 0 ? '...' : ''; $suffix = $end < strlen($text) ? '...' : ''; return $prefix . substr($text, $start, $end - $start) . $suffix; }
function getCollegeSearch($query) { $ignore_words = array("str", "strada", "ale", "aleea", "din", "bld", "bulevardul", "nr", "numarul", "piata", "pta", "orasul", "comuna", "satul", "sat"); $query = getStringWithoutDiacritics($query); $query = mysql_real_escape_string($query); $words = explode(" ", $query); $likes = array(); foreach ($words as $word) { // Ignore one and two letter words, stopwords, and numbers; if (strlen($word) > 2 && !in_array(strtolower($word), $ignore_words) && (int) $word == 0) { $likes[] = "description LIKE '%{$word}%'"; $likes[] = "description LIKE '{$word}%'"; $likes[] = "description LIKE '%{$word}'"; } } if (count($likes) == 0) { return array(); } $where = implode(" OR ", $likes); $s = mysql_query("\n SELECT *\n FROM electoral_colleges\n WHERE {$where}"); $result = array(); while ($r = mysql_fetch_array($s)) { // We drop the descriptions where the match is not a full word, but instead // only a subset of a word. So for example... if the search was "ion" and // the description was "ionescu", we drop it. // // We have to do it this way because MYSQL native regexp matching does not // handle diacritics well, whereas LIKE works perfectly. $clean_description = strtolower(getStringWithoutDiacritics($r['description'])); if (countMatches($words, $clean_description, $ignore_words) == 0) { continue; } $key = $r['name_cdep']; $description = highlightWords(correctDiacritics($r['description']), $words); if (!array_key_exists($key, $result)) { $result[$key] = array(); $result[$key]['score'] = 0; $result[$key]['description'] = array(); $result[$key]['matched_words'] = array(); $result[$key]['name_cdep'] = $r['name_cdep']; $result[$key]['name_senat'] = $r['name_senat']; $result[$key]['id'] = $r['id']; } $result[$key]['description'][] = $description; $result[$key]['matched_words'] = setMatchedWords($result[$key]['matched_words'], $r['description'], $words); if (startsWith($description, "Municipiul")) { $result[$key]['score'] += 2; } if (startsWith($description, "Localitate componentă") || startsWith($description, "Orașul")) { $result[$key]['score'] += 1; } $result[$key]['score'] += (countMatches($words, $clean_description, $ignore_words) - 1) * 2; } foreach ($result as $key => $value) { foreach ($words as $word) { $pos = strpos(strtolower($key), strtolower($word)); if ($pos === false) { // string needle NOT found in haystack } else { // string needle found in haystack $result[$key]['score'] += 2; } } $result[$key]['score'] += count($result[$key]['matched_words']); } usort($result, "collegeResultCompare"); return array_slice($result, 0, 25); }