Пример #1
0
 /**
  * Casts a SimpleXMLElement
  * 
  * @param Zend_Htp_Response 
  * @return SimpleXMLElement
  */
 public function getSimplexml($response)
 {
     if ($this->config['auto_encoding']) {
         // $responseBody = WrapperFactory::factory($response)->getBody();
         $charsetFront = new \Diggin\Http\Charset\Front\UrlRegex();
         $responseBody = $charsetFront->convert($response->getBody(), array('content-type' => $response->getHeaders()->get('content-type')->getFieldValue()));
     } else {
         $responseBody = $response->getBody();
     }
     $responseBody = str_replace('&', '&', $responseBody);
     $domDoc = new \DOMDocument('1.0', 'UTF-8');
     $dom = @$domDoc->loadHTML($responseBody);
     $simplexml = simplexml_import_dom($dom);
     /**
      * add xml manifest
      * @see http://goungoun.dip.jp/app/fswiki/wiki.cgi/devnotebook?
      * page=PHP5%A1%A2%CC%A4%C0%B0%B7%C1HTML%A4%F2SimpleXML%A4%D8%CA%D1%B4%B9
      */
     if ($this->config["xml_manifesto"] === true) {
         $str = $simplexml->asXML();
         // XML宣言付与
         if (1 !== preg_match('/^<\\?xml version="1.0"/', $str)) {
             $str = '<?xml version="1.0" encoding="UTF-8"?>' . "\n" . $str;
         }
         // HTML中の改行が数値文字参照になってしまったので、
         // 文字に戻す。
         $str = $this->_numentToChar($str);
         $simplexml = simplexml_load_string($str);
     }
     return $simplexml;
 }
$html = file_get_contents($url);
echo 'Headers...', PHP_EOL;
var_export($http_response_header);
?>

<?php 
echo 'Content-type is ...';
var_dump($cType = detect_contenttype_from_http_response_header($http_response_header));
?>


<?php 
include 'SplClassLoader.php';
$loader = new SplClassLoader('Diggin', dirname(dirname(dirname(dirname(__DIR__)))) . '/src');
$loader->register();
$front = new \Diggin\Http\Charset\Front\UrlRegex();
echo 'trying convert html without content-type..', PHP_EOL;
sleep(1);
$converted_html = $front->convert($html);
var_dump(mb_substr(preg_replace('/\\s*/s', '', strip_tags($converted_html)), 0, 800));
echo PHP_EOL, PHP_EOL;
echo 'trying convert html with content-type..', PHP_EOL;
sleep(1);
$converted_html = $front->convert($html, array('url' => $url, 'content-type' => $cType));
var_dump(mb_substr(preg_replace('/\\s*/s', '', strip_tags($converted_html)), 0, 800));
function detect_contenttype_from_http_response_header($http_response_header)
{
    foreach ($http_response_header as $v) {
        if (preg_match('/^Content-Type: (.*)/', $v, $m)) {
            return $m[1];
        }