debug() public static method

public static debug ( $text )
Example #1
0
 /**
  * Enter description here...
  *
  * @param phpQueryObject $self
  */
 public static function script($self, $arg1)
 {
     $params = func_get_args();
     $params = array_slice($params, 2);
     $return = null;
     $config = self::$config;
     if (phpQueryPlugin_Scripts::$scriptMethods[$arg1]) {
         phpQuery::callbackRun(phpQueryPlugin_Scripts::$scriptMethods[$arg1], array($self, $params, &$return, $config));
     } elseif ($arg1 != '__config' && file_exists(dirname(__FILE__) . "/Scripts/{$arg1}.php")) {
         phpQuery::debug("Loading script '{$arg1}'");
         require dirname(__FILE__) . "/Scripts/{$arg1}.php";
     } else {
         phpQuery::debug("Requested script '{$arg1}' doesn't exist");
     }
     return $return ? $return : $self;
 }
Example #2
0
<?php

require_once '../phpQuery/phpQuery.php';
phpQuery::$debug = true;
$testName = 'ReplaceWith';
phpQuery::newDocumentFile('test.html')->find('p:eq(1)')->replaceWith("<p class='newTitle'>\n                        this is example title\n                    </p>");
$result = pq('p:eq(1)');
if ($result->hasClass('newTitle')) {
    print "Test '{$testName}' PASSED :)";
} else {
    print "Test '{$testName}' <strong>FAILED</strong> !!! ";
}
$result->dump();
print "\n";
$testName = 'ReplaceAll';
$testResult = 3;
phpQuery::newDocumentFile('test.html');
pq('<div class="replacer">')->replaceAll('li:first p');
$result = pq('.replacer');
if ($result->size() == $testResult) {
    print "Test '{$testName}' PASSED :)";
} else {
    print "Test '{$testName}' <strong>FAILED</strong> !!! ";
}
$result->dump();
print "\n";
Example #3
0
 /**
  * Binds a handler to one or more events (like click) for each matched element.
  * Can also bind custom events.
  *
  * @param DOMNode|phpQueryObject|string $document
  * @param unknown_type $type
  * @param unknown_type $data Optional
  * @param unknown_type $callback
  *
  * @TODO support '!' (exclusive) events
  * @TODO support more than event in $type (space-separated)
  * @TODO support binding to global events
  */
 public static function add($document, $node, $type, $data, $callback = null)
 {
     phpQuery::debug("Binding '{$type}' event");
     $documentID = phpQuery::getDocumentID($document);
     //		if (is_null($callback) && is_callable($data)) {
     //			$callback = $data;
     //			$data = null;
     //		}
     $eventNode = self::getNode($documentID, $node);
     if (!$eventNode) {
         $eventNode = self::setNode($documentID, $node);
     }
     if (!isset($eventNode->eventHandlers[$type])) {
         $eventNode->eventHandlers[$type] = array();
     }
     $eventNode->eventHandlers[$type][] = array('callback' => $callback, 'data' => $data);
 }
Example #4
0
 public static function debug($text)
 {
     phpQuery::debug($text);
 }
Example #5
0
 public function dumpTree($html = true, $title = true)
 {
     $output = $title ? 'DUMP #' . phpQuery::$dumpCount++ . " \n" : '';
     $debug = phpQuery::$debug;
     phpQuery::$debug = false;
     foreach ($this->stack() as $node) {
         $output .= $this->__dumpTree($node);
     }
     phpQuery::$debug = $debug;
     print $html ? nl2br(str_replace(' ', '&nbsp;', $output)) : $output;
     return $this;
 }
Example #6
0
 */
class phpQuery
{
    static $defaultDocumentID;
    static $debug = 0;
    static $documents = array();
    static $defaultCharset = 'utf-8';
    static function debug($text)
    {
        if (self::$debug) {
            print var_dump($text);
        }
    }
}
require_once '../src/phpQuery/DOMDocumentWrapper.php';
phpQuery::$debug = 2;
/* ENCODINGS */
//print '<meta http-equiv="Content-Type" content="text/html;charset=iso-8859-2">';
print '<meta http-equiv="Content-Type" content="text/html;charset=utf-8">';
/* HTML */
//$htmlIso = new DOMDocumentWrapper(
//	file_get_contents('document-types/document-iso88592.html')
//);
//$htmlIsoNoCharset = new DOMDocumentWrapper(
//	file_get_contents('document-types/document-iso88592-nocharset.html'),
//	'text/html;charset=iso-8859-2'
//);
$htmlUtf = new phpQuery\DOMDocumentWrapper(file_get_contents('document-types/document-utf8.html'));
var_dump($htmlUtf->markup());
//$htmlUtfNoCharset = new DOMDocumentWrapper(
//	file_get_contents('document-types/document-utf8-nocharset.html'),
Example #7
0
 /**
  * @param Zend_Http_Client $xhr
  */
 public static function browserReceive($xhr)
 {
     phpQuery::debug("[WebBrowser] Received from " . $xhr->getUri(true));
     // TODO handle meta redirects
     $body = $xhr->getLastResponse()->getBody();
     // XXX error ???
     if (strpos($body, '<!doctype html>') !== false) {
         $body = '<html>' . str_replace('<!doctype html>', '', $body) . '</html>';
     }
     $pq = phpQuery::newDocument($body);
     $pq->document->xhr = $xhr;
     $pq->document->location = $xhr->getUri(true);
     $refresh = $pq->find('meta[http-equiv=refresh]')->add('meta[http-equiv=Refresh]');
     if ($refresh->size()) {
         //			print htmlspecialchars(var_export($xhr->getCookieJar()->getAllCookies(), true));
         //			print htmlspecialchars(var_export($xhr->getLastResponse()->getHeader('Set-Cookie'), true));
         phpQuery::debug("Meta redirect... '{$refresh->attr('content')}'\n");
         // there is a refresh, so get the new url
         $content = $refresh->attr('content');
         $urlRefresh = substr($content, strpos($content, '=') + 1);
         $urlRefresh = trim($urlRefresh, '\'"');
         // XXX not secure ?!
         phpQuery::ajaxAllowURL($urlRefresh);
         //			$urlRefresh = urldecode($urlRefresh);
         // make ajax call, passing last $xhr object to preserve important stuff
         $xhr = phpQuery::ajax(array('type' => 'GET', 'url' => $urlRefresh, 'dataType' => 'html'), $xhr);
         if ($xhr->getLastResponse()->isSuccessful()) {
             // if all is ok, repeat this method...
             return call_user_func_array(array('phpQueryPlugin_WebBrowser', 'browserReceive'), array($xhr));
         }
     } else {
         return $pq;
     }
 }
Example #8
0
 public static function match($html, $data, $rule)
 {
     $match_hash = array();
     if ($data['dom']) {
         iPHP::import(iPHP_LIB . '/phpQuery.php');
         spider::$dataTest && $_GET['pq_debug'] && (phpQuery::$debug = 1);
         $doc = phpQuery::newDocumentHTML($html, 'UTF-8');
         if (strpos($data['rule'], '@') !== false) {
             list($content_dom, $content_attr) = explode("@", $data['rule']);
             $content_fun = 'attr';
         } else {
             list($content_dom, $content_fun, $content_attr) = explode("\n", $data['rule']);
         }
         $content_dom = trim($content_dom);
         $content_fun = trim($content_fun);
         $content_attr = trim($content_attr);
         $content_fun or $content_fun = 'html';
         if ($data['multi']) {
             $conArray = array();
             $_content = null;
             foreach ($doc[$content_dom] as $doc_key => $doc_value) {
                 if ($content_attr) {
                     $_content = phpQuery::pq($doc_value)->{$content_fun}($content_attr);
                 } else {
                     $_content = phpQuery::pq($doc_value)->{$content_fun}();
                 }
                 $cmd5 = md5($_content);
                 if ($match_hash[$cmd5]) {
                     break;
                 }
                 $conArray[$doc_key] = $_content;
                 $match_hash[$cmd5] = true;
             }
             if (spider::$dataTest) {
                 echo "<b>多条匹配结果:</b><pre>";
                 print_r($match_hash);
                 echo "</pre><hr />";
             }
             $content = implode('#--iCMS.PageBreak--#', $conArray);
             unset($conArray, $_content, $match_hash);
         } else {
             if ($content_attr) {
                 $content = $doc[$content_dom]->{$content_fun}($content_attr);
             } else {
                 $content = $doc[$content_dom]->{$content_fun}();
             }
         }
         phpQuery::unloadDocuments($doc->getDocumentID());
         unset($doc);
     } else {
         if (trim($data['rule']) == '<%content%>') {
             $content = $html;
         } else {
             $data_rule = spiderTools::pregTag($data['rule']);
             if (preg_match('/(<\\w+>|\\.\\*|\\.\\+|\\\\d|\\\\w)/i', $data_rule)) {
                 if ($data['multi']) {
                     preg_match_all('|' . $data_rule . '|is', $html, $matches, PREG_SET_ORDER);
                     $conArray = array();
                     foreach ((array) $matches as $mkey => $mat) {
                         $cmd5 = md5($mat['content']);
                         if ($match_hash[$cmd5]) {
                             break;
                         }
                         $conArray[$mkey] = $mat['content'];
                         $match_hash[$cmd5] = true;
                     }
                     if (spider::$dataTest) {
                         echo "<b>多条匹配结果:</b><pre>";
                         print_r($match_hash);
                         echo "</pre><hr />";
                     }
                     $content = implode('#--iCMS.PageBreak--#', $conArray);
                     unset($conArray, $match_hash);
                 } else {
                     preg_match('|' . $data_rule . '|is', $html, $matches, $PREG_SET_ORDER);
                     $content = $matches['content'];
                 }
             } else {
                 $content = $data_rule;
             }
         }
     }
     return $content;
 }
Example #9
0
 /**
  * @param Zend_Http_Client $xhr
  */
 public static function browserDownload($xhr)
 {
     phpQuery::debug("[WebBrowser] Received from " . $xhr->getUri(true));
     // TODO handle meta redirects
     $body = $xhr->getLastResponse()->getBody();
     return $body;
 }
Example #10
0
 /**
  *
  * @param $ajaxSettings
  * @param $callback
  * @param $param1
  * @param $param2
  * @param $param3
  * @return phpQueryObject
  */
 public static function browser($ajaxSettings, $callback, $param1 = null, $param2 = null, $param3 = null)
 {
     if (self::plugin('WebBrowser')) {
         $params = func_get_args();
         return self::callbackRun(array(self::$plugins, 'browser'), $params);
     } else {
         phpQuery::debug('WebBrowser plugin not available...');
     }
 }
Example #11
0
 function content($html, $data, $rule)
 {
     if (trim($data['rule']) === '') {
         return;
     }
     $name = $data['name'];
     if ($data['page']) {
         if (empty($rule['page_url'])) {
             $rule['page_url'] = $rule['list_url'];
         }
         if (empty($this->allHtml)) {
             $page_url_array = array();
             $page_area_rule = trim($rule['page_area_rule']);
             if ($page_area_rule) {
                 if (strpos($page_area_rule, 'DOM::') !== false) {
                     iPHP::import(iPHP_LIB . '/phpQuery.php');
                     $doc = phpQuery::newDocumentHTML($html, 'UTF-8');
                     $pq_dom = str_replace('DOM::', '', $page_area_rule);
                     $pq_array = phpQuery::pq($pq_dom);
                     foreach ($pq_array as $pn => $pq_val) {
                         $href = phpQuery::pq($pq_val)->attr('href');
                         if ($href) {
                             if ($rule['page_url_rule']) {
                                 $page_url_rule = $this->pregTag($rule['page_url_rule']);
                                 // var_dump('|' . $page_url_rule . '|is');
                                 if (!preg_match('|' . $page_url_rule . '|is', $href)) {
                                     continue;
                                 }
                             }
                             $href = str_replace('<%url%>', $href, $rule['page_url']);
                             $page_url_array[$pn] = $this->_url_complement($rule['__url__'], $href);
                         }
                     }
                     if ($page_url_array) {
                         $page_url_array = array_filter($page_url_array);
                         $page_url_array = array_unique($page_url_array);
                         $puk = array_search($rule['__url__'], $page_url_array);
                         unset($page_url_array[$puk]);
                     }
                     //var_dump($page_url_array);
                     // exit;
                 } else {
                     $page_area_rule = $this->pregTag($page_area_rule);
                     if ($page_area_rule) {
                         preg_match('|' . $page_area_rule . '|is', $html, $matches, $PREG_SET_ORDER);
                         $page_area = $matches['content'];
                     } else {
                         $page_area = $html;
                     }
                     if ($rule['page_url_rule']) {
                         $page_url_rule = $this->pregTag($rule['page_url_rule']);
                         preg_match_all('|' . $page_url_rule . '|is', $page_area, $page_url_matches, PREG_SET_ORDER);
                         foreach ($page_url_matches as $pn => $row) {
                             $href = str_replace('<%url%>', $row['url'], $rule['page_url']);
                             $page_url_array[$pn] = $this->_url_complement($rule['__url__'], $href);
                             gc_collect_cycles();
                         }
                     }
                     if ($page_url_array) {
                         $page_url_array = array_filter($page_url_array);
                         $page_url_array = array_unique($page_url_array);
                         $puk = array_search($rule['__url__'], $page_url_array);
                         unset($page_url_array[$puk]);
                     }
                     unset($page_area);
                 }
             } else {
                 // 逻辑方式
                 if ($rule['page_url_parse'] == '<%url%>') {
                     $page_url = str_replace('<%url%>', $rule['__url__'], $rule['page_url']);
                 } else {
                     $page_url_rule = $this->pregTag($rule['page_url_parse']);
                     preg_match('|' . $page_url_rule . '|is', $rule['__url__'], $matches, $PREG_SET_ORDER);
                     $page_url = str_replace('<%url%>', $matches['url'], $rule['page_url']);
                 }
                 if (stripos($page_url, '<%step%>') !== false) {
                     for ($pn = $rule['page_no_start']; $pn <= $rule['page_no_end']; $pn = $pn + $rule['page_no_step']) {
                         $page_url_array[$pn] = str_replace('<%step%>', $pn, $page_url);
                         gc_collect_cycles();
                     }
                 }
             }
             if ($this->contTest) {
                 echo $rule['__url__'] . "<br />";
                 echo $rule['page_url'] . "<br />";
                 echo iS::escapeStr($page_url_rule);
                 echo "<hr />";
             }
             if ($this->contTest) {
                 echo "<pre>";
                 print_r($page_url_array);
                 echo "</pre><hr />";
             }
             $this->content_right_code = trim($rule['page_url_right']);
             $this->content_error_code = trim($rule['page_url_error']);
             $this->curl_proxy = $rule['proxy'];
             $pcontent = '';
             $pcon = '';
             foreach ($page_url_array as $pukey => $purl) {
                 //usleep(100);
                 $phtml = $this->remote($purl);
                 if (empty($phtml)) {
                     break;
                 }
                 $phttp = $this->check_content_code($phtml);
                 if ($phttp['match'] == false) {
                     break;
                 }
                 $pageurl[] = $purl;
                 $pcon .= $phttp['content'];
             }
             gc_collect_cycles();
             $html .= $pcon;
             unset($pcon);
             $this->allHtml = $html;
             if ($this->contTest) {
                 echo "<pre>";
                 print_r($pageurl);
                 echo "</pre><hr />";
             }
         } else {
             $html = $this->allHtml;
         }
     }
     if ($data['dom']) {
         iPHP::import(iPHP_LIB . '/phpQuery.php');
         $this->contTest && $_GET['pq_debug'] && (phpQuery::$debug = 1);
         $doc = phpQuery::newDocumentHTML($html, 'UTF-8');
         //echo "\ndata:getDocumentID:".$doc->getDocumentID()."\n";
         list($content_dom, $content_fun, $content_attr) = explode("\n", $data['rule']);
         $content_dom = trim($content_dom);
         $content_fun = trim($content_fun);
         $content_attr = trim($content_attr);
         $content_fun or $content_fun = 'html';
         if ($data['multi']) {
             $conArray = array();
             foreach ($doc[$content_dom] as $doc_key => $doc_value) {
                 if ($content_attr) {
                     $conArray[] = phpQuery::pq($doc_value)->{$content_fun}($content_attr);
                 } else {
                     $conArray[] = phpQuery::pq($doc_value)->{$content_fun}();
                 }
             }
             $content = implode('#--iCMS.PageBreak--#', $conArray);
             unset($conArray);
         } else {
             if ($content_attr) {
                 $content = $doc[$content_dom]->{$content_fun}($content_attr);
             } else {
                 $content = $doc[$content_dom]->{$content_fun}();
             }
         }
         if ($this->contTest) {
             print_r(htmlspecialchars($content));
             echo "<hr />";
         }
         phpQuery::unloadDocuments($doc->getDocumentID());
         unset($doc);
     } else {
         if (trim($data['rule']) == '<%content%>') {
             $content = $html;
         } else {
             $data_rule = $this->pregTag($data['rule']);
             if ($this->contTest) {
                 print_r(iS::escapeStr($data_rule));
                 echo "<hr />";
             }
             if (preg_match('/(<\\w+>|\\.\\*|\\.\\+|\\\\d|\\\\w)/i', $data_rule)) {
                 if ($data['multi']) {
                     preg_match_all('|' . $data_rule . '|is', $html, $matches, PREG_SET_ORDER);
                     $conArray = array();
                     foreach ((array) $matches as $mkey => $mat) {
                         $conArray[] = $mat['content'];
                     }
                     $content = implode('#--iCMS.PageBreak--#', $conArray);
                     if ($this->contTest) {
                         print_r(htmlspecialchars($content));
                         echo "<hr />";
                     }
                 } else {
                     preg_match('|' . $data_rule . '|is', $html, $matches, $PREG_SET_ORDER);
                     $content = $matches['content'];
                 }
             } else {
                 $content = $data_rule;
             }
         }
     }
     $html = null;
     unset($html);
     if ($data['cleanbefor']) {
         $content = $this->dataClean($data['cleanbefor'], $content);
     }
     if ($data['cleanhtml']) {
         $content = preg_replace('/<[\\/\\!]*?[^<>]*?>/is', '', $content);
     }
     if ($data['format'] && $content) {
         // $_content = iPHP::cleanHtml($content);
         // trim($_content) && $content = $_content;
         $content = autoformat($content);
         $content = stripslashes($content);
         // unset($_content);
     }
     if ($data['img_absolute'] && $content) {
         preg_match_all("/<img.*?src\\s*=[\"|'](.*?)[\"|']/is", $content, $img_match);
         if ($img_match[1]) {
             $_img_array = array_unique($img_match[1]);
             $_img_urls = array();
             foreach ((array) $_img_array as $_img_key => $_img_src) {
                 $_img_urls[$_img_key] = $this->_url_complement($rule['__url__'], $_img_src);
             }
             $content = str_replace($_img_array, $_img_urls, $content);
         }
     }
     $data['trim'] && ($content = trim($content));
     if ($data['capture']) {
         $capture = str_replace('\\', '', $content);
         $content = $this->remote($capture);
     }
     if ($data['cleanafter']) {
         $content = $this->dataClean($data['cleanafter'], $content);
     }
     if ($data['mergepage']) {
         $_content = $content;
         preg_match_all("/<img.*?src\\s*=[\"|'|\\s]*(http:\\/\\/.*?\\.(gif|jpg|jpeg|bmp|png)).*?>/is", $_content, $picArray);
         $pA = array_unique($picArray[1]);
         $pA = array_filter($pA);
         $_pcount = count($pA);
         if ($_pcount < 4) {
             $content = str_replace('#--iCMS.PageBreak--#', "", $content);
         } else {
             $contentA = explode("#--iCMS.PageBreak--#", $_content);
             $newcontent = array();
             $this->checkpage($newcontent, $contentA, 2);
             if (is_array($newcontent)) {
                 $content = array_filter($newcontent);
                 $content = implode('#--iCMS.PageBreak--#', $content);
                 //$content      = addslashes($content);
             } else {
                 //$content      = addslashes($newcontent);
                 $content = $newcontent;
             }
             unset($newcontent, $contentA);
         }
         unset($_content);
     }
     if ($data['empty'] && empty($content)) {
         if ($this->work) {
             echo "\n[" . $name . "内容为空!请检查,规则是否正确!]\n";
             return false;
         } else {
             $this->contTest && (iPHP::$dialog['alert'] = 'window');
             iPHP::alert($name . '内容为空!请检查,规则是否正确!!');
         }
     }
     if ($data['json_decode']) {
         $content = json_decode($content, true);
         // $content = preg_replace_callback('/&#\d{2,5};/u','utf8_num_decode',$content);
         // $content = preg_replace_callback(array(
         //     '/&#x([a-fA-F0-7]{2,8});/u',
         //     '/%u([a-fA-F0-7]{2,8})/u',
         //     '/\\\u([a-fA-F0-7]{2,8})/u'
         //     ),'utf8_entity_decode',$content);
         // $content = htmlspecialchars_decode($content);
     }
     if ($data['array']) {
         return (array) $content;
     }
     return $content;
 }
Example #12
0
 public static function crawl($work = NULL, $pid = NULL, $_rid = NULL, $_urls = null, $callback = null)
 {
     $pid === NULL && ($pid = spider::$pid);
     if ($pid) {
         $project = spider::project($pid);
         $cid = $project['cid'];
         $rid = $project['rid'];
         $prule_list_url = $project['list_url'];
         $lastupdate = $project['lastupdate'];
     } else {
         $cid = spider::$cid;
         $rid = spider::$rid;
     }
     if (empty($rid) && $_rid !== NULL) {
         $rid = $_rid;
     }
     if ($work == 'shell') {
         $lastupdate = $project['lastupdate'];
         if ($project['psleep']) {
             if (time() - $lastupdate < $project['psleep']) {
                 echo '采集方案[' . $pid . "]:" . format_date($lastupdate) . "刚采集过了,请" . $project['psleep'] / 3600 . "小时后在继续采集\n";
                 return;
             }
         }
         echo "开始采集方案[" . $pid . "] 采集规则[" . $rid . "]\n";
     }
     $ruleA = spider::rule($rid);
     $rule = $ruleA['rule'];
     $urls = $rule['list_urls'];
     $project['urls'] && ($urls = $project['urls']);
     spiderUrls::$urls && ($urls = spiderUrls::$urls);
     $_urls && ($urls = $_urls);
     $urlsArray = explode("\n", $urls);
     $urlsArray = array_filter($urlsArray);
     $_urlsArray = $urlsArray;
     $urlsList = array();
     if ($work == 'shell') {
         // echo "$urls\n";
         print_r($urlsArray);
     }
     foreach ($_urlsArray as $_key => $_url) {
         $_url = htmlspecialchars_decode($_url);
         $_urlsList = array();
         /**
          * RULE@rid@url
          * url使用[rid]规则采集并返回列表结果
          */
         if (strpos($_url, 'RULE@') !== false) {
             list($___s, $_rid, $_urls) = explode('@', $_url);
             if (spider::$ruleTest) {
                 print_r('<b>使用[rid:' . $_rid . ']规则抓取列表</b>:' . $_urls);
                 echo "<hr />";
             }
             $_urlsList = spiderUrls::crawl($work, false, $_rid, $_urls, 'CALLBACK@URL');
             $urlsList = array_merge($urlsList, $_urlsList);
             unset($urlsArray[$_key]);
         } else {
             preg_match('|.*<(.*)>.*|is', $_url, $_matches);
             if ($_matches) {
                 list($format, $begin, $num, $step, $zeroize, $reverse) = explode(',', $_matches[1]);
                 $url = str_replace($_matches[1], '*', trim($_matches[0]));
                 $_urlsList = spiderTools::mkurls($url, $format, $begin, $num, $step, $zeroize, $reverse);
                 unset($urlsArray[$_key]);
                 $urlsList = array_merge($urlsList, $_urlsList);
             }
         }
     }
     $urlsList && ($urlsArray = array_merge($urlsArray, $urlsList));
     unset($_urlsArray, $_key, $_url, $_matches, $_urlsList, $urlsList);
     $urlsArray = array_unique($urlsArray);
     // spider::$useragent = $rule['user_agent'];
     // spider::$encoding  = $rule['curl']['encoding'];
     // spider::$referer   = $rule['curl']['referer'];
     // spider::$charset   = $rule['charset'];
     if (empty($urlsArray)) {
         if ($work == 'shell') {
             echo "采集列表为空!请填写!\n";
             return false;
         }
         iPHP::alert('采集列表为空!请填写!', 'js:parent.window.iCMS_MODAL.destroy();');
     }
     //      if(spider::$ruleTest){
     //          echo "<pre>";
     //          print_r(iS::escapeStr($project));
     //          print_r(iS::escapeStr($rule));
     //          echo "</pre>";
     //          echo "<hr />";
     //      }
     if ($rule['mode'] == "2") {
         iPHP::import(iPHP_LIB . '/phpQuery.php');
         spider::$ruleTest && $_GET['pq_debug'] && (phpQuery::$debug = 1);
     }
     $pubArray = array();
     $pubCount = array();
     $pubAllCount = array();
     spider::$curl_proxy = $rule['proxy'];
     spider::$urlslast = null;
     foreach ($urlsArray as $key => $url) {
         $url = trim($url);
         spider::$urlslast = $url;
         if ($work == 'shell') {
             echo '开始采集列表:' . $url . "\n";
         }
         if (spider::$ruleTest) {
             echo '<b>抓取列表:</b>' . $url . "<br />";
         }
         $html = spiderTools::remote($url);
         if (empty($html)) {
             continue;
         }
         if ($rule['mode'] == "2") {
             $doc = phpQuery::newDocumentHTML($html, 'UTF-8');
             $list_area = $doc[trim($rule['list_area_rule'])];
             // if(strpos($rule['list_area_format'], 'DOM::')!==false){
             //     $list_area = spiderTools::dataClean($rule['list_area_format'], $list_area);
             // }
             if ($rule['list_area_format']) {
                 $list_area_format = trim($rule['list_area_format']);
                 if (strpos($list_area_format, 'ARRAY::') !== false) {
                     $list_area_format = str_replace('ARRAY::', '', $list_area_format);
                     $lists = array();
                     foreach ($list_area as $la_key => $la) {
                         $lists[] = phpQuery::pq($list_area_format, $la);
                     }
                 } else {
                     $lists = phpQuery::pq($list_area_format, $list_area);
                 }
             } else {
                 $lists = $list_area;
             }
             // $lists = $list_area;
             //echo 'list:getDocumentID:'.$lists->getDocumentID()."\n";
         } else {
             $list_area_rule = spiderTools::pregTag($rule['list_area_rule']);
             if ($list_area_rule) {
                 preg_match('|' . $list_area_rule . '|is', $html, $matches, $PREG_SET_ORDER);
                 $list_area = $matches['content'];
             } else {
                 $list_area = $html;
             }
             $html = null;
             unset($html);
             if (spider::$ruleTest) {
                 echo iS::escapeStr($rule['list_area_rule']);
                 //              echo iS::escapeStr($list_area);
                 echo "<hr />";
             }
             if ($rule['list_area_format']) {
                 $list_area = spiderTools::dataClean($rule['list_area_format'], $list_area);
             }
             preg_match_all('|' . spiderTools::pregTag($rule['list_url_rule']) . '|is', $list_area, $lists, PREG_SET_ORDER);
             $list_area = null;
             unset($list_area);
             if ($rule['sort'] == "1") {
                 //arsort($lists);
             } elseif ($rule['sort'] == "2") {
                 asort($lists);
             } elseif ($rule['sort'] == "3") {
                 shuffle($lists);
             }
         }
         if (spider::$ruleTest) {
             echo '<b>列表区域规则:</b>' . iS::escapeStr($rule['list_area_rule']);
             echo "<hr />";
             echo '<b>列表区域抓取结果:</b>' . iS::escapeStr($list_area);
             echo "<hr />";
             echo '<b>列表链接规则:</b>' . iS::escapeStr($rule['list_url_rule']);
             echo "<hr />";
             echo '<b>网址合成规则:</b>' . iS::escapeStr($rule['list_url']);
             echo "<hr />";
         }
         if ($prule_list_url) {
             $rule['list_url'] = $prule_list_url;
         }
         //PID@xx 返回URL列表
         if ($callback == 'CALLBACK@URL') {
             $cbListUrl = array();
             foreach ($lists as $lkey => $row) {
                 list(spider::$title, spider::$url) = spiderTools::title_url($row, $rule, $url);
                 if (spider::$url === false) {
                     continue;
                 }
                 // if(spider::checker($work)===true){
                 $cbListUrl[] = spider::$url;
                 // }
             }
             return $cbListUrl;
         }
         if ($work == "shell") {
             $pubCount[$url]['count'] = count($lists);
             $pubAllCount['count'] += $pubCount[$url]['count'];
             echo "开始采集:" . $url . " 列表 " . $pubCount[$url]['count'] . "条记录\n";
             foreach ($lists as $lkey => $row) {
                 list(spider::$title, spider::$url) = spiderTools::title_url($row, $rule, $url);
                 if (spider::$url === false) {
                     continue;
                 }
                 $hash = md5(spider::$url);
                 echo "title:" . spider::$title . "\n";
                 echo "url:" . spider::$url . "\n";
                 spider::$rid = $rid;
                 $checker = spider::checker($work);
                 if ($checker === true) {
                     echo "开始采集....";
                     $callback = spider::publish("shell");
                     if ($callback['code'] == "1001") {
                         $pubCount[$url]['success']++;
                         $pubAllCount['success']++;
                         echo "....√\n";
                         if ($project['sleep']) {
                             echo "sleep:" . $project['sleep'] . "s\n";
                             if ($rule['mode'] != "2") {
                                 unset($lists[$lkey]);
                             }
                             gc_collect_cycles();
                             sleep($project['sleep']);
                         } else {
                             //sleep(1);
                         }
                     } else {
                         $pubCount[$url]['error']++;
                         $pubAllCount['error']++;
                         echo "error\n\n";
                         continue;
                     }
                 }
                 $pubCount[$url]['published']++;
                 $pubAllCount['published']++;
             }
             if ($rule['mode'] == "2") {
                 phpQuery::unloadDocuments($doc->getDocumentID());
             } else {
                 unset($lists);
             }
         }
         if ($work == "WEB@MANUAL") {
             $listsArray[$url] = $lists;
         }
         if ($work == "WEB@AUTO" || $work == 'DATA@RULE') {
             foreach ($lists as $lkey => $row) {
                 list(spider::$title, spider::$url) = spiderTools::title_url($row, $rule, $url);
                 if (spider::$url === false) {
                     continue;
                 }
                 $hash = md5(spider::$url);
                 if (spider::$ruleTest) {
                     echo '<b>列表抓取结果:</b>' . $lkey . '<br />';
                     echo spider::$title . ' (<a href="' . APP_URI . '&do=testdata' . '&url=' . urlencode(spider::$url) . '&rid=' . $rid . '&pid=' . $pid . '&title=' . urlencode(spider::$title) . '" target="_blank">测试内容规则</a>) <br />';
                     echo spider::$url . "<br />";
                     echo $hash . "<br /><hr />";
                 } else {
                     if (spider::checker($work) === true || spider::$dataTest) {
                         $suData = array('sid' => 0, 'url' => spider::$url, 'title' => spider::$title, 'cid' => $cid, 'rid' => $rid, 'pid' => $pid, 'hash' => $hash);
                         switch ($work) {
                             case 'DATA@RULE':
                                 $contentArray[$lkey] = spiderData::crawl();
                                 // $contentArray[$lkey] = spiderUrls::crawl($work,$_pid);
                                 unset($suData['sid']);
                                 $suData['title'] = addslashes($suData['title']);
                                 $suData += array('addtime' => time(), 'status' => '2', 'publish' => '2', 'indexid' => '0', 'pubdate' => '0');
                                 spider::$dataTest or $suid = iDB::insert('spider_url', $suData);
                                 $contentArray[$lkey]['spider_url'] = $suid;
                                 break;
                             case 'WEB@AUTO':
                                 $pubArray[] = $suData;
                                 break;
                         }
                     }
                 }
             }
         }
     }
     $lists = null;
     unset($lists);
     gc_collect_cycles();
     switch ($work) {
         case 'WEB@AUTO':
             return $pubArray;
             break;
         case 'DATA@RULE':
             return $contentArray;
             break;
         case 'WEB@MANUAL':
             return array('cid' => $cid, 'rid' => $rid, 'pid' => $pid, 'sid' => $sid, 'work' => $work, 'rule' => $rule, 'listsArray' => $listsArray);
             break;
         case "shell":
             echo "采集数据统结果:\n";
             print_r($pubCount);
             print_r($pubAllCount);
             echo "全部采集完成....\n";
             iDB::update('spider_project', array('lastupdate' => time()), array('id' => $pid));
             break;
     }
 }
 public function dumpTree()
 {
     print 'DUMP #' . phpQuery::$dumpCount++ . ' ';
     $debug = phpQuery::$debug;
     phpQuery::$debug = false;
     foreach ($this->stack() as $node) {
         print $this->__dumpTree($node);
     }
     phpQuery::$debug = $debug;
     return $this;
 }