/** * Removes the diacritics from all the strings in an array. The method does * not alter the original array and returns a new array with the new strings. * * @param {Array} $array The array of strings that need to be stripped of * diacritics. * @return {Array} The resulting array, no diacritics. */ function getArrayWithoutDiacritics($array) { $new = array(); foreach ($array as $elem) { $new[] = getStringWithoutDiacritics($elem); } return $new; }
/** * Updates the all names array from the name and nameExt strings. It splits * the names by spaces and dashes, it also strips off the diacritics into * english alphabet letters. * * @param {Array} allNames A reference to the array containing the existing * names. This array will change as a result of calling this method. * @param {string} newName The string with all the new names (or old names) * that will be added to the current person. * @return void */ public function addNameToAllNames(&$allNames, $newName) { $partsName = Person::getIndividualParts(trim($newName, Person::TRIM_CHARS)); foreach ($partsName as $elem) { if (trim($elem, Person::TRIM_CHARS) != '') { $clean = getStringWithoutDiacritics(trim($elem, Person::TRIM_CHARS)); $clean = strtolower($clean); if (!in_array($clean, $allNames)) { $allNames[] = $clean; } } } sort($allNames, SORT_STRING); return $allNames; }
/** * Extracts the county short name from a give full college name. So for example * from "D3 Arges" this will extract "AG". Unfortunately, I think the best * way to do this is with a giant switch statement. */ function getCollegeCountyShort($college_name) { $name = getStringWithoutDiacritics(strtolower_ro($college_name)); $county_hash = array("alba" => "AB", "arad" => "AR", "arges" => "AG", "bacau" => "BC", "bihor" => "BH", "bistrita nasaud" => "BN", "bistrita-nasaud" => "BN", "botosani" => "BT", "brasov" => "BV", "braila" => "BR", "buzau" => "BZ", "calarasi" => "CL", "caras-severin" => "CS", "caras severin" => "CS", "cluj" => "CJ", "constanta" => "CT", "covasna" => "CV", "dambovita" => "DB", "dolj" => "DJ", "galati" => "GL", "giurgiu" => "GR", "gorj" => "GJ", "harghita" => "HR", "hunedoara" => "HD", "ialomita" => "IL", "iasi" => "IS", "ilfov" => "IF", "maramures" => "MM", "mehedinti" => "MH", "mures" => "MS", "neamt" => "NT", "olt" => "OT", "prahova" => "PH", "satu mare" => "SM", "salaj" => "SJ", "sibiu" => "SB", "suceava" => "SV", "teleorman" => "TR", "timis" => "TM", "tulcea" => "TL", "vaslui" => "VS", "valcea" => "VL", "vrancea" => "VN", "bucuresti" => "B"); preg_match("/(d|s)(\\d+) (.*)/", $name, $matches); return $county_hash[$matches[3]]; }
function prepareNeedleForDiacriticsRegex($needle) { $needle = getStringWithoutDiacritics($needle); // Replace all the t's with ors for tț $needle = str_replace('t', '(t|ț|Ț)', $needle); $needle = str_replace('s', '(s|ș|Ș)', $needle); $needle = str_replace('a', '(a|ă|â|Ă|Â)', $needle); return $needle; }
function getCollegeSearch($query) { $ignore_words = array("str", "strada", "ale", "aleea", "din", "bld", "bulevardul", "nr", "numarul", "piata", "pta", "orasul", "comuna", "satul", "sat"); $query = getStringWithoutDiacritics($query); $query = mysql_real_escape_string($query); $words = explode(" ", $query); $likes = array(); foreach ($words as $word) { // Ignore one and two letter words, stopwords, and numbers; if (strlen($word) > 2 && !in_array(strtolower($word), $ignore_words) && (int) $word == 0) { $likes[] = "description LIKE '%{$word}%'"; $likes[] = "description LIKE '{$word}%'"; $likes[] = "description LIKE '%{$word}'"; } } if (count($likes) == 0) { return array(); } $where = implode(" OR ", $likes); $s = mysql_query("\n SELECT *\n FROM electoral_colleges\n WHERE {$where}"); $result = array(); while ($r = mysql_fetch_array($s)) { // We drop the descriptions where the match is not a full word, but instead // only a subset of a word. So for example... if the search was "ion" and // the description was "ionescu", we drop it. // // We have to do it this way because MYSQL native regexp matching does not // handle diacritics well, whereas LIKE works perfectly. $clean_description = strtolower(getStringWithoutDiacritics($r['description'])); if (countMatches($words, $clean_description, $ignore_words) == 0) { continue; } $key = $r['name_cdep']; $description = highlightWords(correctDiacritics($r['description']), $words); if (!array_key_exists($key, $result)) { $result[$key] = array(); $result[$key]['score'] = 0; $result[$key]['description'] = array(); $result[$key]['matched_words'] = array(); $result[$key]['name_cdep'] = $r['name_cdep']; $result[$key]['name_senat'] = $r['name_senat']; $result[$key]['id'] = $r['id']; } $result[$key]['description'][] = $description; $result[$key]['matched_words'] = setMatchedWords($result[$key]['matched_words'], $r['description'], $words); if (startsWith($description, "Municipiul")) { $result[$key]['score'] += 2; } if (startsWith($description, "Localitate componentă") || startsWith($description, "Orașul")) { $result[$key]['score'] += 1; } $result[$key]['score'] += (countMatches($words, $clean_description, $ignore_words) - 1) * 2; } foreach ($result as $key => $value) { foreach ($words as $word) { $pos = strpos(strtolower($key), strtolower($word)); if ($pos === false) { // string needle NOT found in haystack } else { // string needle found in haystack $result[$key]['score'] += 2; } } $result[$key]['score'] += count($result[$key]['matched_words']); } usort($result, "collegeResultCompare"); return array_slice($result, 0, 25); }