/**
 *
 * The value $max_delta = 80000 is good enough for speed and detection accuracy.
 * If you set the value of $max_delta too low, no language will be recognized.
 * $max_delta = 400 * 350 = 140000 is the best detection with lowest speed.
 */
function &_api_compare_n_grams(&$n_grams, $encoding, $max_delta = LANGUAGE_DETECT_MAX_DELTA)
{
    static $language_profiles;
    if (!isset($language_profiles)) {
        // Reading the language profile files from the internationalization database.
        $exceptions = array('.', '..', 'CVS', '.htaccess', '.svn', '_svn', 'index.html');
        $path = str_replace("\\", '/', dirname(__FILE__) . '/internationalization_database/language_detection/language_profiles/');
        $non_utf8_encodings =& _api_non_utf8_encodings();
        if (is_dir($path)) {
            if ($handle = @opendir($path)) {
                while (($dir_entry = @readdir($handle)) !== false) {
                    if (api_in_array_nocase($dir_entry, $exceptions)) {
                        continue;
                    }
                    if (strpos($dir_entry, '.txt') === false) {
                        continue;
                    }
                    $dir_entry_full_path = $path . '/' . $dir_entry;
                    if (@filetype($dir_entry_full_path) != 'dir') {
                        if (false !== ($data = @file_get_contents($dir_entry_full_path))) {
                            $language = basename($dir_entry_full_path, '.txt');
                            $encodings = array('UTF-8');
                            if (!empty($non_utf8_encodings[$language])) {
                                $encodings = array_merge($encodings, $non_utf8_encodings[$language]);
                            }
                            foreach ($encodings as $enc) {
                                $data_enc = api_utf8_decode($data, $enc);
                                if (empty($data_enc)) {
                                    continue;
                                }
                                $key = $language . ':' . $enc;
                                $language_profiles[$key]['data'] = array_flip(explode("\n", $data_enc));
                                $language_profiles[$key]['language'] = $language;
                                $language_profiles[$key]['encoding'] = $enc;
                            }
                        }
                    }
                }
            }
        }
        @closedir($handle);
        ksort($language_profiles);
    }
    if (!is_array($n_grams) || empty($n_grams)) {
        return array();
    }
    // Comparison between the input n-grams and the lanuage profiles.
    foreach ($language_profiles as $key => &$language_profile) {
        if (!api_is_language_supported($language_profile['language']) || !api_equal_encodings($encoding, $language_profile['encoding'])) {
            continue;
        }
        $delta = 0;
        // This is a summary measurment for matching between the input text and the current language profile.
        // Searching each n-gram from the input text into the language profile.
        foreach ($n_grams as $rank => &$n_gram) {
            if (isset($language_profile['data'][$n_gram])) {
                // The n-gram has been found, the difference between places in both
                // arrays is calculated (so called delta-points are adopted for
                // measuring distances between n-gram ranks.
                $delta += abs($rank - $language_profile['data'][$n_gram]);
            } else {
                // The n-gram has not been found in the profile. We add then
                // a large enough "distance" in delta-points.
                $delta += 400;
            }
            // Abort: This language already differs too much.
            if ($delta > $max_delta) {
                break;
            }
        }
        // Include only non-aborted languages in result array.
        if ($delta < $max_delta - 400) {
            $result[$key] = $delta;
        }
    }
    if (!isset($result)) {
        return array();
    }
    asort($result);
    return $result;
}
/**
 * Return a list of valid encodings for setting platform character set.
 * @return array	List of valid encodings, preferably IANA-registared.
 */
function api_get_valid_encodings()
{
    $encodings =& _api_non_utf8_encodings();
    if (!is_array($encodings)) {
        $encodings = array('english', array('ISO-8859-15'));
    }
    $result1 = array();
    $result2 = array();
    $result3 = array();
    foreach ($encodings as $value) {
        if (!empty($value)) {
            $encoding = api_refine_encoding_id(trim($value[0]));
            if (!empty($encoding)) {
                if (strpos($encoding, 'ISO-') === 0) {
                    $result1[] = $encoding;
                } elseif (strpos($encoding, 'WINDOWS-') === 0) {
                    $result2[] = $encoding;
                } else {
                    $result3[] = $encoding;
                }
            }
        }
    }
    $result1 = array_unique($result1);
    $result2 = array_unique($result2);
    $result3 = array_unique($result3);
    natsort($result1);
    natsort($result2);
    natsort($result3);
    return array_merge(array('UTF-8'), $result1, $result2, $result3);
}