Example #1
0
/**
 * Detects encoding of xml-formatted text.
 * @param string $string                The input xml-formatted text.
 * @param string $default_encoding      This is the default encoding to be returned if there is no way the xml-text's encoding to be detected. If it not spesified, the system encoding is assumed then.
 * @return string                       Returns the detected encoding.
 * @todo The second parameter is to be eliminated. See api_detect_encoding_html().
 */
function api_detect_encoding_xml($string, $default_encoding = null)
{
    if (preg_match(_PCRE_XML_ENCODING, $string, $matches)) {
        return api_refine_encoding_id($matches[1]);
    }
    if (api_is_valid_utf8($string)) {
        return 'UTF-8';
    }
    if (empty($default_encoding)) {
        $default_encoding = _api_mb_internal_encoding();
    }
    return api_refine_encoding_id($default_encoding);
}
 /**
  * Detects the encoding of a given manifest (a xml-text).
  * It is possible the encoding of the manifest to be wrongly declared or
  * not to be declared at all. The proposed method tries to resolve these problems.
  * @param string $xml    The input xml-text.
  * @return string        The detected value of the input xml.
  */
 private function detect_manifest_encoding(&$xml)
 {
     if (api_is_valid_utf8($xml)) {
         return 'UTF-8';
     }
     if (preg_match(_PCRE_XML_ENCODING, $xml, $matches)) {
         $declared_encoding = api_refine_encoding_id($matches[1]);
     } else {
         $declared_encoding = '';
     }
     if (!empty($declared_encoding) && !api_is_utf8($declared_encoding)) {
         return $declared_encoding;
     }
     $test_string = '';
     if (preg_match_all('/<langstring[^>]*>(.*)<\\/langstring>/m', $xml, $matches)) {
         $test_string = implode("\n", $matches[1]);
         unset($matches);
     }
     if (preg_match_all('/<title[^>]*>(.*)<\\/title>/m', $xml, $matches)) {
         $test_string .= "\n" . implode("\n", $matches[1]);
         unset($matches);
     }
     if (empty($test_string)) {
         $test_string = $xml;
     }
     return api_detect_encoding($test_string);
 }
/**
 * This function reads a Chamilo language file and transforms it into XML,
 * then returns the XML string to the caller.
 */
function get_language_file_as_xml($language = 'english')
{
    $path = api_get_path(SYS_LANG_PATH) . $language . '/';
    if (!is_dir($path) or !is_readable($path)) {
        if ($language != 'english') {
            return get_language_file_as_xml('english');
        } else {
            return '';
        }
    }
    //error_log('Analysing path '.$path);
    $file = $path . 'videoconf.inc.php';
    if (!is_file($file) or !is_readable($file)) {
        if ($language != 'english') {
            return get_language_file_as_xml('english');
        } else {
            return '';
        }
    }
    /*
    $convert = true;
    if(substr($language,-7,7) == 'unicode')
    {//do not convert if the language ends with 'unicode', which means it's in UTF-8
    	$convert=false;
    }
    $list = file($file);
    $xml = '';
    foreach ( $list as $line )
    {
    	if(substr($line,0,1)=='$')
    	{
    		$items = array();
    		$match = preg_match('/^\$([^\s]*)\s*=\s*"(.*)";$/',$line,$items);
    		if($match)
    		{
    			//todo: The following conversion should only happen for old language files (encoded in ISO-8859-1).
    			if($convert)
    			{
    				$string = api_convert_encoding($items[2],'UTF-8','ISO-8859-1');
    			}
    			else
    			{
    				$string = $items[2];
    			}
    			$xml .= '<labelfield><labelid>'.$items[1].'</labelid><labelvalue>'.stripslashes($string).'</labelvalue></labelfield>'."\n";
    		}
    	}
    }
    */
    //---------
    $non_utf8_encoding = api_get_non_utf8_encoding($language);
    $list = file($file);
    $xml = '';
    foreach ($list as $line) {
        if (substr($line, 0, 1) == '$') {
            $items = array();
            $match = preg_match('/^\\$([^\\s]*)\\s*=\\s*"(.*)";$/', $line, $items);
            if ($match) {
                $string = $items[2];
                if (!api_is_valid_utf8($string)) {
                    $string = api_html_entity_decode(api_utf8_encode($string, $non_utf8_encoding), ENT_QUOTES, 'UTF-8');
                }
                $xml .= '<labelfield><labelid>' . $items[1] . '</labelid><labelvalue>' . stripslashes($string) . '</labelvalue></labelfield>' . "\n";
            }
        }
    }
    //---------
    if (empty($xml) && $language != 'english') {
        return get_language_file_as_xml('english');
    }
    return $xml;
}
/**
 * Detects encoding of plain text.
 * @param string $string				The input text.
 * @param string $language (optional)	The language of the input text, provided if it is known.
 * @return string						Returns the detected encoding.
 */
function api_detect_encoding($string, $language = null)
{
    // Testing against valid UTF-8 first.
    if (api_is_valid_utf8($string)) {
        return 'UTF-8';
    }
    return mb_detect_encoding($string);
}
/**
 * Detects encoding of plain text.
 * @param string $string				The input text.
 * @param string $language (optional)	The language of the input text, provided if it is known.
 * @return string						Returns the detected encoding.
 */
function api_detect_encoding($string, $language = null)
{
    // Testing against valid UTF-8 first.
    if (api_is_valid_utf8($string)) {
        return 'UTF-8';
    }
    $result = null;
    $delta_points_min = LANGUAGE_DETECT_MAX_DELTA;
    // Testing non-UTF-8 encodings.
    $encodings = api_get_valid_encodings();
    foreach ($encodings as &$encoding) {
        if (api_is_encoding_supported($encoding) && !api_is_utf8($encoding)) {
            $stringToParse = api_substr($string, 0, LANGUAGE_DETECT_MAX_LENGTH, $encoding);
            $strintToParse2 = _api_generate_n_grams($stringToParse, $encoding);
            $result_array = _api_compare_n_grams($strintToParse2, $encoding);
            if (!empty($result_array)) {
                list($key, $delta_points) = each($result_array);
                if ($delta_points < $delta_points_min) {
                    $pos = strpos($key, ':');
                    $result_encoding = api_refine_encoding_id(substr($key, $pos + 1));
                    if (api_equal_encodings($encoding, $result_encoding)) {
                        if ($string == api_utf8_decode(api_utf8_encode($string, $encoding), $encoding)) {
                            $delta_points_min = $delta_points;
                            $result = $encoding;
                        }
                    }
                }
            }
        }
    }
    // "Broken" UTF-8 texts are to be detected as UTF-8.
    // This functionality is enabled when language of the text is known.
    $language = api_purify_language_id((string) $language);
    if (!empty($language)) {
        $encoding = 'UTF-8';
        $result_array =& _api_compare_n_grams(_api_generate_n_grams(api_substr($string, 0, LANGUAGE_DETECT_MAX_LENGTH, $encoding), $encoding), $encoding);
        if (!empty($result_array)) {
            list($key, $delta_points) = each($result_array);
            if ($delta_points < $delta_points_min) {
                $pos = strpos($key, ':');
                $result_encoding = api_refine_encoding_id(substr($key, $pos + 1));
                $result_language = substr($key, 0, $pos);
                if ($language == $result_language && api_is_utf8($result_encoding)) {
                    $delta_points_min = $delta_points;
                    $result = $encoding;
                }
            }
        }
    }
    return $result;
}