protected function parse_charset() { global $debugObject; $charset = null; if (function_exists('get_last_retrieve_url_contents_content_type')) { $contentTypeHeader = get_last_retrieve_url_contents_content_type(); $success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches); if ($success) { $charset = $matches[1]; if (is_object($debugObject)) { $debugObject->debugLog(2, 'header content-type found charset of: ' . $charset); } } } if (empty($charset)) { $el = $this->root->find('meta[http-equiv=Content-Type]', 0); if (!empty($el)) { $fullvalue = $el->content; if (is_object($debugObject)) { $debugObject->debugLog(2, 'meta content-type tag found' . $fullvalue); } if (!empty($fullvalue)) { $success = preg_match('/charset=(.+)/', $fullvalue, $matches); if ($success) { $charset = $matches[1]; } else { // If there is a meta tag, and they don't specify the character set, research says that it's typically ISO-8859-1 if (is_object($debugObject)) { $debugObject->debugLog(2, 'meta content-type tag couldn\'t be parsed. using iso-8859 default.'); } $charset = 'ISO-8859-1'; } } } } // If we couldn't find a charset above, then lets try to detect one based on the text we got... if (empty($charset)) { // Have php try to detect the encoding from the text given to us. $charset = mb_detect_encoding($this->root->plaintext . "ascii", $encoding_list = array("UTF-8", "CP1252")); if (is_object($debugObject)) { $debugObject->debugLog(2, 'mb_detect found: ' . $charset); } // and if this doesn't work... then we need to just wrongheadedly assume it's UTF-8 so that we can move on - cause this will usually give us most of what we need... if ($charset === false) { if (is_object($debugObject)) { $debugObject->debugLog(2, 'since mb_detect failed - using default of utf-8'); } $charset = 'UTF-8'; } } // Since CP1252 is a superset, if we get one of it's subsets, we want it instead. if (strtolower($charset) == strtolower('ISO-8859-1') || strtolower($charset) == strtolower('Latin1') || strtolower($charset) == strtolower('Latin-1')) { if (is_object($debugObject)) { $debugObject->debugLog(2, 'replacing ' . $charset . ' with CP1252 as its a superset'); } $charset = 'CP1252'; } if (is_object($debugObject)) { $debugObject->debugLog(1, 'EXIT - ' . $charset); } return $this->_charset = $charset; }
protected function parse_charset() { global $debugObject; $charset = null; if (function_exists('get_last_retrieve_url_contents_content_type')) { $contentTypeHeader = get_last_retrieve_url_contents_content_type(); $success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches); if ($success) { $charset = $matches[1]; if (is_object($debugObject)) { $debugObject->debugLog(2, 'header content-type found charset of: ' . $charset); } } } if (empty($charset)) { $el = $this->root->find('meta[http-equiv=Content-Type]', 0); if (!empty($el)) { $fullvalue = $el->content; if (is_object($debugObject)) { $debugObject->debugLog(2, 'meta content-type tag found' . $fullvalue); } if (!empty($fullvalue)) { $success = preg_match('/charset=(.+)/', $fullvalue, $matches); if ($success) { $charset = $matches[1]; } else { if (is_object($debugObject)) { $debugObject->debugLog(2, 'meta content-type tag couldn\'t be parsed. using iso-8859 default.'); } $charset = 'ISO-8859-1'; } } } } if (empty($charset)) { $charset = mb_detect_encoding($this->root->plaintext . "ascii", $encoding_list = array("UTF-8", "CP1252")); if (is_object($debugObject)) { $debugObject->debugLog(2, 'mb_detect found: ' . $charset); } if ($charset === false) { if (is_object($debugObject)) { $debugObject->debugLog(2, 'since mb_detect failed - using default of utf-8'); } $charset = 'UTF-8'; } } if (strtolower($charset) == strtolower('ISO-8859-1') || strtolower($charset) == strtolower('Latin1') || strtolower($charset) == strtolower('Latin-1')) { if (is_object($debugObject)) { $debugObject->debugLog(2, 'replacing ' . $charset . ' with CP1252 as its a superset'); } $charset = 'CP1252'; } if (is_object($debugObject)) { $debugObject->debugLog(1, 'EXIT - ' . $charset); } return $this->_charset = $charset; }
protected function parseCharset() { $charset = null; if (function_exists('get_last_retrieve_url_contents_content_type')) { $contentTypeHeader = get_last_retrieve_url_contents_content_type(); $success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches); if ($success) { $charset = $matches[1]; } } if (empty($charset)) { $el = $this->root->find('meta[http-equiv=Content-Type]', 0); if (!empty($el)) { $fullValue = $el->getAttribute("content"); if (!empty($fullValue)) { $success = preg_match('/charset=(.+)/', $fullValue, $matches); if ($success) { $charset = $matches[1]; } else { // If there is a meta tag, and they don't specify the character set, research says that it's typically ISO-8859-1 $charset = 'ISO-8859-1'; } } } } // If we couldn't find a charset above, then lets try to detect one based on the text we got... if (empty($charset)) { // Have php try to detect the encoding from the text given to us. $charset = mb_detect_encoding($this->root->text() . "ascii", $encoding_list = array("UTF-8", "CP1252")); // and if this doesn't work... then we need to just wrongheadedly assume it's UTF-8 so that we can move on - cause this will usually give us most of what we need... if ($charset === false) { $charset = 'UTF-8'; } } // Since CP1252 is a superset, if we get one of it's subsets, we want it instead. if (strtolower($charset) == strtolower('ISO-8859-1') || strtolower($charset) == strtolower('Latin1') || strtolower($charset) == strtolower('Latin-1')) { $charset = 'CP1252'; } return $this->_charset = $charset; }
protected function parse_charset() { ${"GLOBALS"}["tfsixdenjjrh"] = "charset"; global $debugObject; ${${"GLOBALS"}["tfsixdenjjrh"]} = null; $dbmqkvqedt = "debugObject"; $qozbquzr = "charset"; if (function_exists("get_last_retrieve_url_contents_content_type")) { $byktqjrhnxw = "success"; ${${"GLOBALS"}["wjbmshsyauk"]} = get_last_retrieve_url_contents_content_type(); ${${"GLOBALS"}["uksmpinbto"]} = preg_match("/charset=(.+)/", ${${"GLOBALS"}["wjbmshsyauk"]}, ${${"GLOBALS"}["nxftgxpc"]}); if (${$byktqjrhnxw}) { $yrfhqya = "charset"; ${"GLOBALS"}["bxxlymm"] = "debugObject"; $ogvpicuoxrs = "matches"; ${$yrfhqya} = ${$ogvpicuoxrs}[1]; if (is_object(${${"GLOBALS"}["bxxlymm"]})) { $debugObject->debugLog(2, "header content-type found charset of: " . ${${"GLOBALS"}["xsjlyd"]}); } } } if (empty(${${"GLOBALS"}["xsjlyd"]})) { ${${"GLOBALS"}["xsjlyd"]} = $this->_target_charset; } $zsmttub = "charset"; ${"GLOBALS"}["ohdxxciw"] = "charset"; if (empty(${$qozbquzr})) { ${${"GLOBALS"}["nuhgvbce"]} = $this->root->find("meta[http-equiv=Content-Type]", 0); ${"GLOBALS"}["sbnoch"] = "el"; if (!empty(${${"GLOBALS"}["sbnoch"]})) { ${"GLOBALS"}["wdbsmnap"] = "fullvalue"; ${${"GLOBALS"}["wdbsmnap"]} = $el->content; $vtdrchd = "fullvalue"; if (is_object(${${"GLOBALS"}["vqbocptfe"]})) { ${"GLOBALS"}["vppsqwcwsw"] = "fullvalue"; $debugObject->debugLog(2, "meta content-type tag found" . ${${"GLOBALS"}["vppsqwcwsw"]}); } if (!empty(${$vtdrchd})) { ${"GLOBALS"}["fsfcjji"] = "matches"; ${${"GLOBALS"}["uksmpinbto"]} = preg_match("/charset=(.+)/", ${${"GLOBALS"}["ldevfjobpzog"]}, ${${"GLOBALS"}["fsfcjji"]}); if (${${"GLOBALS"}["uksmpinbto"]}) { ${"GLOBALS"}["xgyzztl"] = "matches"; ${${"GLOBALS"}["xsjlyd"]} = ${${"GLOBALS"}["xgyzztl"]}[1]; } else { ${"GLOBALS"}["plnfnvfhvwb"] = "charset"; if (is_object(${${"GLOBALS"}["vqbocptfe"]})) { $debugObject->debugLog(2, "meta content-type tag couldn't be parsed. using iso-8859 default."); } ${${"GLOBALS"}["plnfnvfhvwb"]} = "ISO-8859-1"; } } } } $dgmmnu = "charset"; if (empty(${${"GLOBALS"}["xsjlyd"]})) { $vbhrxfof = "debugObject"; ${${"GLOBALS"}["xsjlyd"]} = "UTF-8"; if (is_object(${$vbhrxfof})) { $debugObject->debugLog(2, "mb_detect found: " . ${${"GLOBALS"}["xsjlyd"]}); } if (${${"GLOBALS"}["xsjlyd"]} === false) { if (is_object(${${"GLOBALS"}["vqbocptfe"]})) { $debugObject->debugLog(2, "since mb_detect failed - using default of utf-8"); } ${${"GLOBALS"}["xsjlyd"]} = "UTF-8"; } } if (strtolower(${${"GLOBALS"}["xsjlyd"]}) == strtolower("ISO-8859-1") || strtolower(${${"GLOBALS"}["ohdxxciw"]}) == strtolower("Latin1") || strtolower(${$zsmttub}) == strtolower("Latin-1")) { ${"GLOBALS"}["vpqeqsicjk"] = "charset"; if (is_object(${${"GLOBALS"}["vqbocptfe"]})) { $gbynqty = "charset"; $debugObject->debugLog(2, "replacing " . ${$gbynqty} . " with CP1252 as its a superset"); } ${${"GLOBALS"}["vpqeqsicjk"]} = "CP1252"; } if (is_object(${$dbmqkvqedt})) { $ecpttgpx = "charset"; $debugObject->debugLog(1, "EXIT - " . ${$ecpttgpx}); } return $this->_charset = ${$dgmmnu}; }
protected function parse_charset() { global $debugObject; $charset = NULL; if (function_exists("get_last_retrieve_url_contents_content_type")) { $contentTypeHeader = get_last_retrieve_url_contents_content_type(); $success = preg_match("/charset=(.+)/", $contentTypeHeader, $matches); if ($success) { $charset = $matches[1]; if (is_object($debugObject)) { $debugObject->debugLog(2, "header content-type found charset of: " . $charset); } } } if (empty($charset)) { $el = $this->root->find("meta[http-equiv=Content-Type]", 0); if (!empty($el)) { $fullvalue = $el->content; if (is_object($debugObject)) { $debugObject->debugLog(2, "meta content-type tag found" . $fullvalue); } if (!empty($fullvalue)) { $success = preg_match("/charset=(.+)/", $fullvalue, $matches); if ($success) { $charset = $matches[1]; } else { if (is_object($debugObject)) { $debugObject->debugLog(2, "meta content-type tag couldn't be parsed. using iso-8859 default."); } $charset = "ISO-8859-1"; } } } } if (empty($charset)) { $charset = mb_detect_encoding($this->root->plaintext . "ascii", $encoding_list = array("UTF-8", "CP1252")); if (is_object($debugObject)) { $debugObject->debugLog(2, "mb_detect found: " . $charset); } if ($charset === false) { if (is_object($debugObject)) { $debugObject->debugLog(2, "since mb_detect failed - using default of utf-8"); } $charset = "UTF-8"; } } if (strtolower($charset) == strtolower("ISO-8859-1") || strtolower($charset) == strtolower("Latin1") || strtolower($charset) == strtolower("Latin-1")) { if (is_object($debugObject)) { $debugObject->debugLog(2, "replacing " . $charset . " with CP1252 as its a superset"); } $charset = "CP1252"; } if (is_object($debugObject)) { $debugObject->debugLog(1, "EXIT - " . $charset); } return $this->_charset = $charset; }