示例#1
0
 /**
  * Get all snippets for a given Yahoo SERP page.
  * @param  SimpleHtmlDom $SHDObject
  * @return array
  */
 protected function getPageSnippets($SHDObject)
 {
     $snippets = array();
     foreach ($SHDObject->find('.aAbs') as $object) {
         $snippetText = $this->cleanText($object->innertext);
         $snippets[] = $this->fixRepeatedSpace($snippetText);
     }
     // fetch only organic results
     return $this->normalizeResult($snippets);
 }
示例#2
0
 public static function str_get_html($str, $lowercase = true, $forceTagsClosed = true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN = true, $defaultBRText = DEFAULT_BR_TEXT, $defaultSpanText = DEFAULT_SPAN_TEXT)
 {
     $dom = new SimpleHtmlDom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText);
     if (empty($str) || strlen($str) > MAX_FILE_SIZE) {
         $dom->clear();
         return false;
     }
     $dom->load($str, $lowercase, $stripRN);
     return $dom;
 }
示例#3
0
 /**
  * Get all snippets for a given Bing SERP.
  * @param  SimpleHtmlDom $SHDObject
  * @return array
  */
 protected function getPageSnippets($SHDObject)
 {
     $snippets = array();
     // snippets in Bing's SERP are embedded into a <p> element child of b_caption
     foreach ($SHDObject->find('.b_caption p') as $object) {
         $snippetText = $this->cleanText($object->innertext);
         $snippets[] = $this->fixRepeatedSpace($snippetText);
     }
     // fetch only organic results
     return $this->normalizeResult($snippets);
 }
示例#4
0
 /**
  * Get all snippets for a given Google SERP.
  * @param  SimpleHtmlDom $SHDObject
  * @return array
  */
 protected function getPageSnippets($SHDObject)
 {
     $snippets = array();
     // snippets in Google's SERP are embedded into a <span> element with class "st"
     foreach ($SHDObject->find('.st') as $object) {
         $snippetText = $this->cleanText($object->innertext);
         $snippets[] = $this->fixRepeatedSpace($snippetText);
     }
     // fetch only organic results
     return $this->normalizeResult($snippets);
 }
 /**
  * get dom node's plain text
  *
  * @return string
  */
 public function text()
 {
     if (isset($this->_[HDOM_INFO_INNER])) {
         return $this->_[HDOM_INFO_INNER];
     }
     switch ($this->nodetype) {
         case HDOM_TYPE_TEXT:
             return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
         case HDOM_TYPE_COMMENT:
             return '';
         case HDOM_TYPE_UNKNOWN:
             return '';
     }
     if (strcasecmp($this->tag, 'script') === 0) {
         return '';
     }
     if (strcasecmp($this->tag, 'style') === 0) {
         return '';
     }
     $ret = '';
     // In rare cases, (always node type 1 or HDOM_TYPE_ELEMENT - observed for some span tags, and some p tags) $this->nodes is set to NULL.
     // NOTE: This indicates that there is a problem where it's set to NULL without a clear happening.
     // WHY is this happening?
     if (null !== $this->nodes) {
         foreach ($this->nodes as $n) {
             $ret .= $n->text();
         }
         // If this node is a span... add a space at the end of it so multiple spans don't run into each other.  This is plaintext after all.
         if ($this->tag == 'span') {
             $ret .= $this->dom->default_span_text;
         }
     }
     return $ret;
 }
 private function makeup()
 {
     // text, comment, unknown
     if (isset($this->_[DomInfo::TEXT])) {
         return $this->dom->restore_noise($this->_[DomInfo::TEXT]);
     }
     $ret = '<' . $this->tag;
     $i = -1;
     foreach ($this->attr as $key => $val) {
         ++$i;
         // skip removed attribute
         if ($val === null || $val === false) {
             continue;
         }
         $ret .= $this->_[DomInfo::SPACE][$i][0];
         //no value attr: nowrap, checked selected...
         if ($val === true) {
             $ret .= $key;
         } else {
             switch ($this->_[DomInfo::QUOTE][$i]) {
                 case QuoteType::DOUBLE:
                     $quote = '"';
                     break;
                 case QuoteType::SINGLE:
                     $quote = '\'';
                     break;
                 default:
                     $quote = '';
             }
             $ret .= $key . $this->_[DomInfo::SPACE][$i][1] . '=' . $this->_[DomInfo::SPACE][$i][2] . $quote . $val . $quote;
         }
     }
     $ret = $this->dom->restore_noise($ret);
     return $ret . $this->_[DomInfo::ENDSPACE] . '>';
 }
 /**
  * clean up memory due to php5 circular references memory leak...
  */
 public function clear()
 {
     foreach ($this->nodes as $n) {
         $n->clear();
         $n = null;
     }
     // This add next line is documented in the sourceforge repository. 2977248 as a fix for ongoing memory leaks that occur even with the use of clear.
     if (isset($this->children)) {
         /** @noinspection PhpWrongForeachArgumentTypeInspection */
         foreach ($this->children as $n) {
             if (is_object($n)) {
                 /** @noinspection PhpUndefinedMethodInspection */
                 $n->clear();
             }
             $n = null;
         }
     }
     if (isset($this->parent)) {
         $this->parent->clear();
         unset($this->parent);
     }
     if (isset($this->root)) {
         $this->root->clear();
         unset($this->root);
     }
     unset($this->doc, $this->docArray, $this->noise, $this->parent, $this->root);
 }
示例#8
0
 public function set()
 {
     $data = file_get_contents('http://www.kongregate.com/contests?haref=hp_devcontest');
     $dom = new SimpleHtmlDom();
     $dom->load($data);
     $tables = [];
     $temp = [];
     foreach ($dom->find('table.contests') as $element) {
         foreach ($element->find('tr.js-game-hover') as $tr) {
             $temp[] = trim($tr->plaintext);
         }
         $tables[] = $temp;
         $temp = '';
     }
     $this->_DB->query('INSERT INTO contests (data, snap_date) VALUES (:data, :snap_date)');
     $this->_DB->bind(['data' => json_encode($tables), 'snap_date' => date('Y-m-d H:i:s')]);
     $this->_DB->execute();
     return $tables;
 }
 /**
  * get html dom from string
  *
  * @param        $str
  * @param bool   $forceTagsClosed
  *
  * @return bool|\voku\helper\SimpleHtmlDom
  */
 public static function str_get_html($str, $forceTagsClosed = true)
 {
     $dom = new SimpleHtmlDom(null, $forceTagsClosed);
     $dom->load($str);
     return $dom;
 }
示例#10
0
	public function pasteListAction() {
		$html = Redokes_Controller_Front::getInstance()->getParam('html', '');
		$parentId = intval(Redokes_Controller_Front::getInstance()->getParam('parentId', 0));
		$trackId = intval(Redokes_Controller_Front::getInstance()->getParam('trackId', 0));
		if ($trackId) {
			// check what kind of content was sent
			
			if (preg_match('/<ul|<ol/i', $html)) {
				// html list
				// convert any ol to ul
				$html = str_replace('<ol', '<ul', $html);
				$html = str_replace('</ol', '</ul', $html);

				// load up the html into a dom object
				$dom = new SimpleHtmlDom($html);
				$ul = $dom->find('>ul', 0);
				$this->makeFromUl($ul, $trackId, $parentId);
			}
			else if (preg_match('/[\n\r]/', $html)) {
				// multi line
				$this->makeFromNewLines($html, $trackId, $parentId);
			}
			else {
				// assume single line
				$this->makeFromNewLines($html, $trackId, $parentId);
			}
			return;
			
			$track = new Navigation_Model_Track($trackId);
			$track->clearCache();
		}
	}
示例#11
0
 public static function getZipFromCity($city, $street)
 {
     //options for HTTP request
     $options = array('http' => array('header' => "Content-Type: application/x-www-form-urlencoded", 'method' => "GET", 'content' => http_build_query(array('city' => $city, 'street' => $street))));
     //formulating HTTP request
     $context = stream_context_create($options);
     //loading of answer for HTTP request and parsing of requested html file
     $html = SimpleHtmlDom::file_get_html('http://psc.posta.sk/', false, $context);
     $zipMessageObject = $html->find('.zip');
     //finding
     if (isset($zipMessageObject[0]->plaintext)) {
         $zip = $zipMessageObject[0]->plaintext;
         return $zip;
     } else {
         return "Nepodarilo sa najsť PSČ.";
     }
 }