/** * Casts a SimpleXMLElement * * @param Zend_Htp_Response * @return SimpleXMLElement */ public function getSimplexml($response) { if ($this->config['auto_encoding']) { // $responseBody = WrapperFactory::factory($response)->getBody(); $charsetFront = new \Diggin\Http\Charset\Front\UrlRegex(); $responseBody = $charsetFront->convert($response->getBody(), array('content-type' => $response->getHeaders()->get('content-type')->getFieldValue())); } else { $responseBody = $response->getBody(); } $responseBody = str_replace('&', '&', $responseBody); $domDoc = new \DOMDocument('1.0', 'UTF-8'); $dom = @$domDoc->loadHTML($responseBody); $simplexml = simplexml_import_dom($dom); /** * add xml manifest * @see http://goungoun.dip.jp/app/fswiki/wiki.cgi/devnotebook? * page=PHP5%A1%A2%CC%A4%C0%B0%B7%C1HTML%A4%F2SimpleXML%A4%D8%CA%D1%B4%B9 */ if ($this->config["xml_manifesto"] === true) { $str = $simplexml->asXML(); // XML宣言付与 if (1 !== preg_match('/^<\\?xml version="1.0"/', $str)) { $str = '<?xml version="1.0" encoding="UTF-8"?>' . "\n" . $str; } // HTML中の改行が数値文字参照になってしまったので、 // 文字に戻す。 $str = $this->_numentToChar($str); $simplexml = simplexml_load_string($str); } return $simplexml; }
var_export($http_response_header); ?> <?php echo 'Content-type is ...'; var_dump($cType = detect_contenttype_from_http_response_header($http_response_header)); ?> <?php include 'SplClassLoader.php'; $loader = new SplClassLoader('Diggin', dirname(dirname(dirname(dirname(__DIR__)))) . '/src'); $loader->register(); $front = new \Diggin\Http\Charset\Front\UrlRegex(); echo 'trying convert html without content-type..', PHP_EOL; sleep(1); $converted_html = $front->convert($html); var_dump(mb_substr(preg_replace('/\\s*/s', '', strip_tags($converted_html)), 0, 800)); echo PHP_EOL, PHP_EOL; echo 'trying convert html with content-type..', PHP_EOL; sleep(1); $converted_html = $front->convert($html, array('url' => $url, 'content-type' => $cType)); var_dump(mb_substr(preg_replace('/\\s*/s', '', strip_tags($converted_html)), 0, 800)); function detect_contenttype_from_http_response_header($http_response_header) { foreach ($http_response_header as $v) { if (preg_match('/^Content-Type: (.*)/', $v, $m)) { return $m[1]; } } }