public static function title_url($row, $rule, $baseUrl = null) { spiderTools::$listArray = array(); $responses = array(); if (strpos($rule['list_url_rule'], '<%url%>') !== false) { $responses = $row; } else { if (is_object($row)) { $list_url_rule = explode("\n", $rule['list_url_rule']); $DOM = phpQuery::pq($row); $keyMap = array('title', 'url'); foreach ($list_url_rule as $key => $value) { $dom_rule = trim($value); if (strpos($dom_rule, '@@') !== false) { list($dom_key, $dom_rule) = explode("@@", $dom_rule); } else { $dom_key = $keyMap[$key]; } $content = ''; if (strpos($dom_rule, 'DOM::') !== false) { $content = spiderTools::domAttr($DOM, $dom_rule); } else { if ($dom_key == 'url') { $dom_rule or $dom_rule = 'href'; } if ($dom_key == 'title') { $dom_rule or $dom_rule = 'text'; } if ($dom_rule == 'text') { $content = $DOM->text(); } else { $content = $DOM->attr($dom_rule); } } $responses[$dom_key] = str_replace(' ', '', trim($content)); } unset($DOM); } } $title = $responses['title']; $url = $responses['url']; $title = trim($title); $url = trim($url); $url = str_replace('<%url%>', $url, $rule['list_url']); if (strpos($url, 'AUTO::') !== false && $baseUrl) { $url = str_replace('AUTO::', '', $url); $url = spiderTools::url_complement($baseUrl, $url); } $rule['list_url_clean'] && ($url = spiderTools::dataClean($rule['list_url_clean'], $url)); $title = preg_replace('/<[\\/\\!]*?[^<>]*?>/is', '', $title); unset($responses['title'], $responses['url']); if ($responses) { foreach ($responses as $key => $value) { if (!is_numeric($key) && strpos($key, 'var_') === false) { spiderTools::$listArray[$key] = $value; } } unset($responses); } return array($title, $url); }