<?php $item = $_REQUEST; if (isset($item['ajax']) and $item['ajax'] == 1 and $url = $item['url']) { require_once 'simple_html_dom.php'; if ($html = html_no_comment($url)) { $html = str_get_html($html); $script = $html->find("script"); foreach ($script as $sc) { $sc->outertext = ''; } $a = $html->find("body", 0)->childNodes(); $noidung = ""; foreach ($a as $child) { $noidung .= $child->outertext(); } $html->clear(); unset($html); echo $noidung; exit; } } function _isCurl() { return function_exists('curl_version'); } function file_get_contents_curl($url) { //$url=urlencode($url); //debug($url); $ch = curl_init();
$pattern_link = (isset($arr_pattern_link[$j]) and $arr_pattern_link[$j]) ? $arr_pattern_link[$j] : $arr_pattern_link[0]; $pattern_img = (isset($arr_pattern_img[$j]) and $arr_pattern_img[$j]) ? $arr_pattern_img[$j] : $arr_pattern_img[0]; //debug($pattern_bound); $max_item = $site['count']; $num = 0; foreach ($html->find($pattern_bound) as $bound) { if ($num == $max_item) { break; } $num++; foreach ($bound->find($pattern_link) as $link) { $link = check_link($link->getAttribute('href'), $host); } //echo $link.'<br>'; // parse row $html_detail = html_no_comment($link); if ($html_detail) { $html_detail = str_get_html($html_detail); $item = array(); if ($pattern) { foreach ($pattern as $key => $value) { //echo '<pre>'; //print_r($value); $element_delete = $value['element_delete']; //echo $element_delete.'<br>'; if ($detail_pattern = $value['extra']) { // Nếu mẫu cần lấy có dạng đối tượng con có thứ tự {nth} của một đối tượng, if (preg_match("/{([^*]+)}/", $detail_pattern, $child)) { $detail_pattern = substr($detail_pattern, 0, strpos($detail_pattern, '{')); // Nếu có chỉ định đối tượng con cụ thể dạng childelement-nth if (strpos($child[1], '-')) {