/**
 *
 * The value $max_delta = 80000 is good enough for speed and detection accuracy.
 * If you set the value of $max_delta too low, no language will be recognized.
 * $max_delta = 400 * 350 = 140000 is the best detection with lowest speed.
 */
function &_api_compare_n_grams(&$n_grams, $encoding, $max_delta = LANGUAGE_DETECT_MAX_DELTA)
{
    static $language_profiles;
    if (!isset($language_profiles)) {
        // Reading the language profile files from the internationalization database.
        $exceptions = array('.', '..', 'CVS', '.htaccess', '.svn', '_svn', 'index.html');
        $path = str_replace("\\", '/', dirname(__FILE__) . '/internationalization_database/language_detection/language_profiles/');
        $non_utf8_encodings =& _api_non_utf8_encodings();
        if (is_dir($path)) {
            if ($handle = @opendir($path)) {
                while (($dir_entry = @readdir($handle)) !== false) {
                    if (api_in_array_nocase($dir_entry, $exceptions)) {
                        continue;
                    }
                    if (strpos($dir_entry, '.txt') === false) {
                        continue;
                    }
                    $dir_entry_full_path = $path . '/' . $dir_entry;
                    if (@filetype($dir_entry_full_path) != 'dir') {
                        if (false !== ($data = @file_get_contents($dir_entry_full_path))) {
                            $language = basename($dir_entry_full_path, '.txt');
                            $encodings = array('UTF-8');
                            if (!empty($non_utf8_encodings[$language])) {
                                $encodings = array_merge($encodings, $non_utf8_encodings[$language]);
                            }
                            foreach ($encodings as $enc) {
                                $data_enc = api_utf8_decode($data, $enc);
                                if (empty($data_enc)) {
                                    continue;
                                }
                                $key = $language . ':' . $enc;
                                $language_profiles[$key]['data'] = array_flip(explode("\n", $data_enc));
                                $language_profiles[$key]['language'] = $language;
                                $language_profiles[$key]['encoding'] = $enc;
                            }
                        }
                    }
                }
            }
        }
        @closedir($handle);
        ksort($language_profiles);
    }
    if (!is_array($n_grams) || empty($n_grams)) {
        return array();
    }
    // Comparison between the input n-grams and the lanuage profiles.
    foreach ($language_profiles as $key => &$language_profile) {
        if (!api_is_language_supported($language_profile['language']) || !api_equal_encodings($encoding, $language_profile['encoding'])) {
            continue;
        }
        $delta = 0;
        // This is a summary measurment for matching between the input text and the current language profile.
        // Searching each n-gram from the input text into the language profile.
        foreach ($n_grams as $rank => &$n_gram) {
            if (isset($language_profile['data'][$n_gram])) {
                // The n-gram has been found, the difference between places in both
                // arrays is calculated (so called delta-points are adopted for
                // measuring distances between n-gram ranks.
                $delta += abs($rank - $language_profile['data'][$n_gram]);
            } else {
                // The n-gram has not been found in the profile. We add then
                // a large enough "distance" in delta-points.
                $delta += 400;
            }
            // Abort: This language already differs too much.
            if ($delta > $max_delta) {
                break;
            }
        }
        // Include only non-aborted languages in result array.
        if ($delta < $max_delta - 400) {
            $result[$key] = $delta;
        }
    }
    if (!isset($result)) {
        return array();
    }
    asort($result);
    return $result;
}
/**
 * Returns in an array the most-probably used non-UTF-8 encoding for the given language.
 * The first (leading) value is actually used by the system at the moment.
 * @param string $language (optional)	The specified language, the default value is the user intrface language.
 * @return string						The correspondent encoding to the specified language.
 * Note: See the file chamilo/main/inc/lib/internationalization_database/non_utf8_encodings.php
 * if you wish to revise the leading non-UTF-8 encoding for your language.
 */
function api_get_non_utf8_encoding($language = null)
{
    $language_is_supported = api_is_language_supported($language);
    if (!$language_is_supported || empty($language)) {
        $language = api_get_interface_language(false, true);
    }
    $language = api_purify_language_id($language);
    $encodings =& _api_non_utf8_encodings();
    if (is_array($encodings[$language])) {
        if (!empty($encodings[$language][0])) {
            return $encodings[$language][0];
        }
        return null;
    }
    return null;
}