/**
 * Removes the diacritics from all the strings in an array. The method does
 * not alter the original array and returns a new array with the new strings.
 *
 * @param {Array} $array The array of strings that need to be stripped of
 *     diacritics.
 * @return {Array} The resulting array, no diacritics.
 */
function getArrayWithoutDiacritics($array)
{
    $new = array();
    foreach ($array as $elem) {
        $new[] = getStringWithoutDiacritics($elem);
    }
    return $new;
}
 /**
  * Updates the all names array from the name and nameExt strings. It splits
  * the names by spaces and dashes, it also strips off the diacritics into
  * english alphabet letters.
  *
  * @param {Array} allNames A reference to the array containing the existing
  *     names. This array will change as a result of calling this method.
  * @param {string} newName The string with all the new names (or old names)
  *     that will be added to the current person.
  * @return void
  */
 public function addNameToAllNames(&$allNames, $newName)
 {
     $partsName = Person::getIndividualParts(trim($newName, Person::TRIM_CHARS));
     foreach ($partsName as $elem) {
         if (trim($elem, Person::TRIM_CHARS) != '') {
             $clean = getStringWithoutDiacritics(trim($elem, Person::TRIM_CHARS));
             $clean = strtolower($clean);
             if (!in_array($clean, $allNames)) {
                 $allNames[] = $clean;
             }
         }
     }
     sort($allNames, SORT_STRING);
     return $allNames;
 }
/**
 * Extracts the county short name from a give full college name. So for example
 * from "D3 Arges" this will extract "AG". Unfortunately, I think the best
 * way to do this is with a giant switch statement.
 */
function getCollegeCountyShort($college_name)
{
    $name = getStringWithoutDiacritics(strtolower_ro($college_name));
    $county_hash = array("alba" => "AB", "arad" => "AR", "arges" => "AG", "bacau" => "BC", "bihor" => "BH", "bistrita nasaud" => "BN", "bistrita-nasaud" => "BN", "botosani" => "BT", "brasov" => "BV", "braila" => "BR", "buzau" => "BZ", "calarasi" => "CL", "caras-severin" => "CS", "caras severin" => "CS", "cluj" => "CJ", "constanta" => "CT", "covasna" => "CV", "dambovita" => "DB", "dolj" => "DJ", "galati" => "GL", "giurgiu" => "GR", "gorj" => "GJ", "harghita" => "HR", "hunedoara" => "HD", "ialomita" => "IL", "iasi" => "IS", "ilfov" => "IF", "maramures" => "MM", "mehedinti" => "MH", "mures" => "MS", "neamt" => "NT", "olt" => "OT", "prahova" => "PH", "satu mare" => "SM", "salaj" => "SJ", "sibiu" => "SB", "suceava" => "SV", "teleorman" => "TR", "timis" => "TM", "tulcea" => "TL", "vaslui" => "VS", "valcea" => "VL", "vrancea" => "VN", "bucuresti" => "B");
    preg_match("/(d|s)(\\d+) (.*)/", $name, $matches);
    return $county_hash[$matches[3]];
}
function prepareNeedleForDiacriticsRegex($needle)
{
    $needle = getStringWithoutDiacritics($needle);
    // Replace all the t's with ors for tț
    $needle = str_replace('t', '(t|ț|Ț)', $needle);
    $needle = str_replace('s', '(s|ș|Ș)', $needle);
    $needle = str_replace('a', '(a|ă|â|Ă|Â)', $needle);
    return $needle;
}
Beispiel #5
0
function getCollegeSearch($query)
{
    $ignore_words = array("str", "strada", "ale", "aleea", "din", "bld", "bulevardul", "nr", "numarul", "piata", "pta", "orasul", "comuna", "satul", "sat");
    $query = getStringWithoutDiacritics($query);
    $query = mysql_real_escape_string($query);
    $words = explode(" ", $query);
    $likes = array();
    foreach ($words as $word) {
        // Ignore one and two letter words, stopwords, and numbers;
        if (strlen($word) > 2 && !in_array(strtolower($word), $ignore_words) && (int) $word == 0) {
            $likes[] = "description LIKE '%{$word}%'";
            $likes[] = "description LIKE '{$word}%'";
            $likes[] = "description LIKE '%{$word}'";
        }
    }
    if (count($likes) == 0) {
        return array();
    }
    $where = implode(" OR ", $likes);
    $s = mysql_query("\n      SELECT *\n      FROM electoral_colleges\n      WHERE {$where}");
    $result = array();
    while ($r = mysql_fetch_array($s)) {
        // We drop the descriptions where the match is not a full word, but instead
        // only a subset of a word. So for example... if the search was "ion" and
        // the description was "ionescu", we drop it.
        //
        // We have to do it this way because MYSQL native regexp matching does not
        // handle diacritics well, whereas LIKE works perfectly.
        $clean_description = strtolower(getStringWithoutDiacritics($r['description']));
        if (countMatches($words, $clean_description, $ignore_words) == 0) {
            continue;
        }
        $key = $r['name_cdep'];
        $description = highlightWords(correctDiacritics($r['description']), $words);
        if (!array_key_exists($key, $result)) {
            $result[$key] = array();
            $result[$key]['score'] = 0;
            $result[$key]['description'] = array();
            $result[$key]['matched_words'] = array();
            $result[$key]['name_cdep'] = $r['name_cdep'];
            $result[$key]['name_senat'] = $r['name_senat'];
            $result[$key]['id'] = $r['id'];
        }
        $result[$key]['description'][] = $description;
        $result[$key]['matched_words'] = setMatchedWords($result[$key]['matched_words'], $r['description'], $words);
        if (startsWith($description, "Municipiul")) {
            $result[$key]['score'] += 2;
        }
        if (startsWith($description, "Localitate componentă") || startsWith($description, "Orașul")) {
            $result[$key]['score'] += 1;
        }
        $result[$key]['score'] += (countMatches($words, $clean_description, $ignore_words) - 1) * 2;
    }
    foreach ($result as $key => $value) {
        foreach ($words as $word) {
            $pos = strpos(strtolower($key), strtolower($word));
            if ($pos === false) {
                // string needle NOT found in haystack
            } else {
                // string needle found in haystack
                $result[$key]['score'] += 2;
            }
        }
        $result[$key]['score'] += count($result[$key]['matched_words']);
    }
    usort($result, "collegeResultCompare");
    return array_slice($result, 0, 25);
}