protected function parse_charset()
 {
     global $debugObject;
     $charset = null;
     if (function_exists('get_last_retrieve_url_contents_content_type')) {
         $contentTypeHeader = get_last_retrieve_url_contents_content_type();
         $success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches);
         if ($success) {
             $charset = $matches[1];
             if (is_object($debugObject)) {
                 $debugObject->debugLog(2, 'header content-type found charset of: ' . $charset);
             }
         }
     }
     if (empty($charset)) {
         $el = $this->root->find('meta[http-equiv=Content-Type]', 0);
         if (!empty($el)) {
             $fullvalue = $el->content;
             if (is_object($debugObject)) {
                 $debugObject->debugLog(2, 'meta content-type tag found' . $fullvalue);
             }
             if (!empty($fullvalue)) {
                 $success = preg_match('/charset=(.+)/', $fullvalue, $matches);
                 if ($success) {
                     $charset = $matches[1];
                 } else {
                     // If there is a meta tag, and they don't specify the character set, research says that it's typically ISO-8859-1
                     if (is_object($debugObject)) {
                         $debugObject->debugLog(2, 'meta content-type tag couldn\'t be parsed. using iso-8859 default.');
                     }
                     $charset = 'ISO-8859-1';
                 }
             }
         }
     }
     // If we couldn't find a charset above, then lets try to detect one based on the text we got...
     if (empty($charset)) {
         // Have php try to detect the encoding from the text given to us.
         $charset = mb_detect_encoding($this->root->plaintext . "ascii", $encoding_list = array("UTF-8", "CP1252"));
         if (is_object($debugObject)) {
             $debugObject->debugLog(2, 'mb_detect found: ' . $charset);
         }
         // and if this doesn't work...  then we need to just wrongheadedly assume it's UTF-8 so that we can move on - cause this will usually give us most of what we need...
         if ($charset === false) {
             if (is_object($debugObject)) {
                 $debugObject->debugLog(2, 'since mb_detect failed - using default of utf-8');
             }
             $charset = 'UTF-8';
         }
     }
     // Since CP1252 is a superset, if we get one of it's subsets, we want it instead.
     if (strtolower($charset) == strtolower('ISO-8859-1') || strtolower($charset) == strtolower('Latin1') || strtolower($charset) == strtolower('Latin-1')) {
         if (is_object($debugObject)) {
             $debugObject->debugLog(2, 'replacing ' . $charset . ' with CP1252 as its a superset');
         }
         $charset = 'CP1252';
     }
     if (is_object($debugObject)) {
         $debugObject->debugLog(1, 'EXIT - ' . $charset);
     }
     return $this->_charset = $charset;
 }
Example #2
0
 protected function parse_charset()
 {
     global $debugObject;
     $charset = null;
     if (function_exists('get_last_retrieve_url_contents_content_type')) {
         $contentTypeHeader = get_last_retrieve_url_contents_content_type();
         $success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches);
         if ($success) {
             $charset = $matches[1];
             if (is_object($debugObject)) {
                 $debugObject->debugLog(2, 'header content-type found charset of: ' . $charset);
             }
         }
     }
     if (empty($charset)) {
         $el = $this->root->find('meta[http-equiv=Content-Type]', 0);
         if (!empty($el)) {
             $fullvalue = $el->content;
             if (is_object($debugObject)) {
                 $debugObject->debugLog(2, 'meta content-type tag found' . $fullvalue);
             }
             if (!empty($fullvalue)) {
                 $success = preg_match('/charset=(.+)/', $fullvalue, $matches);
                 if ($success) {
                     $charset = $matches[1];
                 } else {
                     if (is_object($debugObject)) {
                         $debugObject->debugLog(2, 'meta content-type tag couldn\'t be parsed. using iso-8859 default.');
                     }
                     $charset = 'ISO-8859-1';
                 }
             }
         }
     }
     if (empty($charset)) {
         $charset = mb_detect_encoding($this->root->plaintext . "ascii", $encoding_list = array("UTF-8", "CP1252"));
         if (is_object($debugObject)) {
             $debugObject->debugLog(2, 'mb_detect found: ' . $charset);
         }
         if ($charset === false) {
             if (is_object($debugObject)) {
                 $debugObject->debugLog(2, 'since mb_detect failed - using default of utf-8');
             }
             $charset = 'UTF-8';
         }
     }
     if (strtolower($charset) == strtolower('ISO-8859-1') || strtolower($charset) == strtolower('Latin1') || strtolower($charset) == strtolower('Latin-1')) {
         if (is_object($debugObject)) {
             $debugObject->debugLog(2, 'replacing ' . $charset . ' with CP1252 as its a superset');
         }
         $charset = 'CP1252';
     }
     if (is_object($debugObject)) {
         $debugObject->debugLog(1, 'EXIT - ' . $charset);
     }
     return $this->_charset = $charset;
 }
 protected function parseCharset()
 {
     $charset = null;
     if (function_exists('get_last_retrieve_url_contents_content_type')) {
         $contentTypeHeader = get_last_retrieve_url_contents_content_type();
         $success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches);
         if ($success) {
             $charset = $matches[1];
         }
     }
     if (empty($charset)) {
         $el = $this->root->find('meta[http-equiv=Content-Type]', 0);
         if (!empty($el)) {
             $fullValue = $el->getAttribute("content");
             if (!empty($fullValue)) {
                 $success = preg_match('/charset=(.+)/', $fullValue, $matches);
                 if ($success) {
                     $charset = $matches[1];
                 } else {
                     // If there is a meta tag, and they don't specify the character set, research says that it's typically ISO-8859-1
                     $charset = 'ISO-8859-1';
                 }
             }
         }
     }
     // If we couldn't find a charset above, then lets try to detect one based on the text we got...
     if (empty($charset)) {
         // Have php try to detect the encoding from the text given to us.
         $charset = mb_detect_encoding($this->root->text() . "ascii", $encoding_list = array("UTF-8", "CP1252"));
         // and if this doesn't work...  then we need to just wrongheadedly assume it's UTF-8 so that we can move on - cause this will usually give us most of what we need...
         if ($charset === false) {
             $charset = 'UTF-8';
         }
     }
     // Since CP1252 is a superset, if we get one of it's subsets, we want it instead.
     if (strtolower($charset) == strtolower('ISO-8859-1') || strtolower($charset) == strtolower('Latin1') || strtolower($charset) == strtolower('Latin-1')) {
         $charset = 'CP1252';
     }
     return $this->_charset = $charset;
 }
 protected function parse_charset()
 {
     ${"GLOBALS"}["tfsixdenjjrh"] = "charset";
     global $debugObject;
     ${${"GLOBALS"}["tfsixdenjjrh"]} = null;
     $dbmqkvqedt = "debugObject";
     $qozbquzr = "charset";
     if (function_exists("get_last_retrieve_url_contents_content_type")) {
         $byktqjrhnxw = "success";
         ${${"GLOBALS"}["wjbmshsyauk"]} = get_last_retrieve_url_contents_content_type();
         ${${"GLOBALS"}["uksmpinbto"]} = preg_match("/charset=(.+)/", ${${"GLOBALS"}["wjbmshsyauk"]}, ${${"GLOBALS"}["nxftgxpc"]});
         if (${$byktqjrhnxw}) {
             $yrfhqya = "charset";
             ${"GLOBALS"}["bxxlymm"] = "debugObject";
             $ogvpicuoxrs = "matches";
             ${$yrfhqya} = ${$ogvpicuoxrs}[1];
             if (is_object(${${"GLOBALS"}["bxxlymm"]})) {
                 $debugObject->debugLog(2, "header content-type found charset of: " . ${${"GLOBALS"}["xsjlyd"]});
             }
         }
     }
     if (empty(${${"GLOBALS"}["xsjlyd"]})) {
         ${${"GLOBALS"}["xsjlyd"]} = $this->_target_charset;
     }
     $zsmttub = "charset";
     ${"GLOBALS"}["ohdxxciw"] = "charset";
     if (empty(${$qozbquzr})) {
         ${${"GLOBALS"}["nuhgvbce"]} = $this->root->find("meta[http-equiv=Content-Type]", 0);
         ${"GLOBALS"}["sbnoch"] = "el";
         if (!empty(${${"GLOBALS"}["sbnoch"]})) {
             ${"GLOBALS"}["wdbsmnap"] = "fullvalue";
             ${${"GLOBALS"}["wdbsmnap"]} = $el->content;
             $vtdrchd = "fullvalue";
             if (is_object(${${"GLOBALS"}["vqbocptfe"]})) {
                 ${"GLOBALS"}["vppsqwcwsw"] = "fullvalue";
                 $debugObject->debugLog(2, "meta content-type tag found" . ${${"GLOBALS"}["vppsqwcwsw"]});
             }
             if (!empty(${$vtdrchd})) {
                 ${"GLOBALS"}["fsfcjji"] = "matches";
                 ${${"GLOBALS"}["uksmpinbto"]} = preg_match("/charset=(.+)/", ${${"GLOBALS"}["ldevfjobpzog"]}, ${${"GLOBALS"}["fsfcjji"]});
                 if (${${"GLOBALS"}["uksmpinbto"]}) {
                     ${"GLOBALS"}["xgyzztl"] = "matches";
                     ${${"GLOBALS"}["xsjlyd"]} = ${${"GLOBALS"}["xgyzztl"]}[1];
                 } else {
                     ${"GLOBALS"}["plnfnvfhvwb"] = "charset";
                     if (is_object(${${"GLOBALS"}["vqbocptfe"]})) {
                         $debugObject->debugLog(2, "meta content-type tag couldn't be parsed. using iso-8859 default.");
                     }
                     ${${"GLOBALS"}["plnfnvfhvwb"]} = "ISO-8859-1";
                 }
             }
         }
     }
     $dgmmnu = "charset";
     if (empty(${${"GLOBALS"}["xsjlyd"]})) {
         $vbhrxfof = "debugObject";
         ${${"GLOBALS"}["xsjlyd"]} = "UTF-8";
         if (is_object(${$vbhrxfof})) {
             $debugObject->debugLog(2, "mb_detect found: " . ${${"GLOBALS"}["xsjlyd"]});
         }
         if (${${"GLOBALS"}["xsjlyd"]} === false) {
             if (is_object(${${"GLOBALS"}["vqbocptfe"]})) {
                 $debugObject->debugLog(2, "since mb_detect failed - using default of utf-8");
             }
             ${${"GLOBALS"}["xsjlyd"]} = "UTF-8";
         }
     }
     if (strtolower(${${"GLOBALS"}["xsjlyd"]}) == strtolower("ISO-8859-1") || strtolower(${${"GLOBALS"}["ohdxxciw"]}) == strtolower("Latin1") || strtolower(${$zsmttub}) == strtolower("Latin-1")) {
         ${"GLOBALS"}["vpqeqsicjk"] = "charset";
         if (is_object(${${"GLOBALS"}["vqbocptfe"]})) {
             $gbynqty = "charset";
             $debugObject->debugLog(2, "replacing " . ${$gbynqty} . " with CP1252 as its a superset");
         }
         ${${"GLOBALS"}["vpqeqsicjk"]} = "CP1252";
     }
     if (is_object(${$dbmqkvqedt})) {
         $ecpttgpx = "charset";
         $debugObject->debugLog(1, "EXIT - " . ${$ecpttgpx});
     }
     return $this->_charset = ${$dgmmnu};
 }
Example #5
0
 protected function parse_charset()
 {
     global $debugObject;
     $charset = NULL;
     if (function_exists("get_last_retrieve_url_contents_content_type")) {
         $contentTypeHeader = get_last_retrieve_url_contents_content_type();
         $success = preg_match("/charset=(.+)/", $contentTypeHeader, $matches);
         if ($success) {
             $charset = $matches[1];
             if (is_object($debugObject)) {
                 $debugObject->debugLog(2, "header content-type found charset of: " . $charset);
             }
         }
     }
     if (empty($charset)) {
         $el = $this->root->find("meta[http-equiv=Content-Type]", 0);
         if (!empty($el)) {
             $fullvalue = $el->content;
             if (is_object($debugObject)) {
                 $debugObject->debugLog(2, "meta content-type tag found" . $fullvalue);
             }
             if (!empty($fullvalue)) {
                 $success = preg_match("/charset=(.+)/", $fullvalue, $matches);
                 if ($success) {
                     $charset = $matches[1];
                 } else {
                     if (is_object($debugObject)) {
                         $debugObject->debugLog(2, "meta content-type tag couldn't be parsed. using iso-8859 default.");
                     }
                     $charset = "ISO-8859-1";
                 }
             }
         }
     }
     if (empty($charset)) {
         $charset = mb_detect_encoding($this->root->plaintext . "ascii", $encoding_list = array("UTF-8", "CP1252"));
         if (is_object($debugObject)) {
             $debugObject->debugLog(2, "mb_detect found: " . $charset);
         }
         if ($charset === false) {
             if (is_object($debugObject)) {
                 $debugObject->debugLog(2, "since mb_detect failed - using default of utf-8");
             }
             $charset = "UTF-8";
         }
     }
     if (strtolower($charset) == strtolower("ISO-8859-1") || strtolower($charset) == strtolower("Latin1") || strtolower($charset) == strtolower("Latin-1")) {
         if (is_object($debugObject)) {
             $debugObject->debugLog(2, "replacing " . $charset . " with CP1252 as its a superset");
         }
         $charset = "CP1252";
     }
     if (is_object($debugObject)) {
         $debugObject->debugLog(1, "EXIT - " . $charset);
     }
     return $this->_charset = $charset;
 }