コード例 #1
0
 /**
  * find dom node by css selector
  *
  * Paperg - allow us to specify that we want case insensitive testing of the value of the selector.
  *
  * @param      $selector
  * @param null $idx
  *
  * @return array|null|\voku\helper\SimpleHtmlDomNode[]|\voku\helper\SimpleHtmlDomNode
  */
 public function find($selector, $idx = null)
 {
     $find = $this->root->find($selector, $idx);
     if ($find === null) {
         return new SimpleHtmlDomNodeBlank();
     } else {
         return $find;
     }
 }
コード例 #2
0
 protected function parseCharset()
 {
     $charset = null;
     if (function_exists('get_last_retrieve_url_contents_content_type')) {
         $contentTypeHeader = get_last_retrieve_url_contents_content_type();
         $success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches);
         if ($success) {
             $charset = $matches[1];
         }
     }
     if (empty($charset)) {
         $el = $this->root->find('meta[http-equiv=Content-Type]', 0);
         if (!empty($el)) {
             $fullValue = $el->getAttribute("content");
             if (!empty($fullValue)) {
                 $success = preg_match('/charset=(.+)/', $fullValue, $matches);
                 if ($success) {
                     $charset = $matches[1];
                 } else {
                     // If there is a meta tag, and they don't specify the character set, research says that it's typically ISO-8859-1
                     $charset = 'ISO-8859-1';
                 }
             }
         }
     }
     // If we couldn't find a charset above, then lets try to detect one based on the text we got...
     if (empty($charset)) {
         // Have php try to detect the encoding from the text given to us.
         $charset = mb_detect_encoding($this->root->text() . "ascii", $encoding_list = array("UTF-8", "CP1252"));
         // and if this doesn't work...  then we need to just wrongheadedly assume it's UTF-8 so that we can move on - cause this will usually give us most of what we need...
         if ($charset === false) {
             $charset = 'UTF-8';
         }
     }
     // Since CP1252 is a superset, if we get one of it's subsets, we want it instead.
     if (strtolower($charset) == strtolower('ISO-8859-1') || strtolower($charset) == strtolower('Latin1') || strtolower($charset) == strtolower('Latin-1')) {
         $charset = 'CP1252';
     }
     return $this->_charset = $charset;
 }
コード例 #3
0
 /**
  * dump html dom tree
  *
  * @param SimpleHtmlDomNode $node
  */
 public static function dump_html_tree($node)
 {
     $node->dump($node);
 }
コード例 #4
0
 /**
  * append child
  *
  * @param SimpleHtmlDomNode $node
  *
  * @return mixed
  */
 public function appendChild($node)
 {
     $node->parent($this);
     return $node;
 }