Пример #1
0
 /**
  * 分析提取HTML页面中的字段
  * 
  * @param mixed $html
  * @return void
  * @author seatle <*****@*****.**> 
  * @created time :2016-09-18 10:17
  */
 public function get_html_fields($html, $url, $page)
 {
     $fields = $this->get_fields(self::$configs['fields'], $html, $url, $page);
     if (!empty($fields)) {
         if ($this->on_extract_page) {
             $return_data = call_user_func($this->on_extract_page, $page, $fields);
             if (!isset($return_data)) {
                 log::warn("on_extract_page function return value can't be empty\n");
             } elseif (!is_array($return_data)) {
                 log::warn("on_extract_page function return value must be an array\n");
             } else {
                 $fields = $return_data;
             }
         }
         if (isset($fields) && is_array($fields)) {
             $fields_num = $this->incr_fields_num();
             $fields_str = json_encode($fields, JSON_UNESCAPED_UNICODE);
             //if (isset(self::$configs['show_encoding']) && strtolower(self::$configs['show_encoding']) != 'utf-8')
             //{
             //$fields_str = mb_convert_encoding($fields_str, self::$configs['show_encoding'], 'utf-8');
             //}
             if (util::is_win()) {
                 $fields_str = mb_convert_encoding($fields_str, 'gb2312', 'utf-8');
             }
             log::info(date("H:i:s") . " Result[{$fields_num}]: " . $fields_str . "\n");
             // 如果设置了导出选项
             if (!empty(self::$configs['export'])) {
                 self::$export_type = isset(self::$configs['export']['type']) ? self::$configs['export']['type'] : '';
                 if (self::$export_type == 'csv') {
                     util::put_file(self::$export_file, util::format_csv($fields) . "\n", FILE_APPEND);
                 } elseif (self::$export_type == 'sql') {
                     $sql = db::insert(self::$export_table, $fields, true);
                     util::put_file(self::$export_file, $sql . ";\n", FILE_APPEND);
                 } elseif (self::$export_type == 'db') {
                     db::insert(self::$export_table, $fields);
                 }
             }
         }
     }
 }