Beispiel #1
0
 public static function title_url($row, $rule, $baseUrl = null)
 {
     spiderTools::$listArray = array();
     $responses = array();
     if (strpos($rule['list_url_rule'], '<%url%>') !== false) {
         $responses = $row;
     } else {
         if (is_object($row)) {
             $list_url_rule = explode("\n", $rule['list_url_rule']);
             $DOM = phpQuery::pq($row);
             $keyMap = array('title', 'url');
             foreach ($list_url_rule as $key => $value) {
                 $dom_rule = trim($value);
                 if (strpos($dom_rule, '@@') !== false) {
                     list($dom_key, $dom_rule) = explode("@@", $dom_rule);
                 } else {
                     $dom_key = $keyMap[$key];
                 }
                 $content = '';
                 if (strpos($dom_rule, 'DOM::') !== false) {
                     $content = spiderTools::domAttr($DOM, $dom_rule);
                 } else {
                     if ($dom_key == 'url') {
                         $dom_rule or $dom_rule = 'href';
                     }
                     if ($dom_key == 'title') {
                         $dom_rule or $dom_rule = 'text';
                     }
                     if ($dom_rule == 'text') {
                         $content = $DOM->text();
                     } else {
                         $content = $DOM->attr($dom_rule);
                     }
                 }
                 $responses[$dom_key] = str_replace('&nbsp;', '', trim($content));
             }
             unset($DOM);
         }
     }
     $title = $responses['title'];
     $url = $responses['url'];
     $title = trim($title);
     $url = trim($url);
     $url = str_replace('<%url%>', $url, $rule['list_url']);
     if (strpos($url, 'AUTO::') !== false && $baseUrl) {
         $url = str_replace('AUTO::', '', $url);
         $url = spiderTools::url_complement($baseUrl, $url);
     }
     $rule['list_url_clean'] && ($url = spiderTools::dataClean($rule['list_url_clean'], $url));
     $title = preg_replace('/<[\\/\\!]*?[^<>]*?>/is', '', $title);
     unset($responses['title'], $responses['url']);
     if ($responses) {
         foreach ($responses as $key => $value) {
             if (!is_numeric($key) && strpos($key, 'var_') === false) {
                 spiderTools::$listArray[$key] = $value;
             }
         }
         unset($responses);
     }
     return array($title, $url);
 }