/** * 分析提取HTML页面中的字段 * * @param mixed $html * @return void * @author seatle <*****@*****.**> * @created time :2016-09-18 10:17 */ public function get_html_fields($html, $url, $page) { $fields = $this->get_fields(self::$configs['fields'], $html, $url, $page); if (!empty($fields)) { if ($this->on_extract_page) { $return_data = call_user_func($this->on_extract_page, $page, $fields); if (!isset($return_data)) { log::warn("on_extract_page function return value can't be empty\n"); } elseif (!is_array($return_data)) { log::warn("on_extract_page function return value must be an array\n"); } else { $fields = $return_data; } } if (isset($fields) && is_array($fields)) { $fields_num = $this->incr_fields_num(); $fields_str = json_encode($fields, JSON_UNESCAPED_UNICODE); //if (isset(self::$configs['show_encoding']) && strtolower(self::$configs['show_encoding']) != 'utf-8') //{ //$fields_str = mb_convert_encoding($fields_str, self::$configs['show_encoding'], 'utf-8'); //} if (util::is_win()) { $fields_str = mb_convert_encoding($fields_str, 'gb2312', 'utf-8'); } log::info(date("H:i:s") . " Result[{$fields_num}]: " . $fields_str . "\n"); // 如果设置了导出选项 if (!empty(self::$configs['export'])) { self::$export_type = isset(self::$configs['export']['type']) ? self::$configs['export']['type'] : ''; if (self::$export_type == 'csv') { util::put_file(self::$export_file, util::format_csv($fields) . "\n", FILE_APPEND); } elseif (self::$export_type == 'sql') { $sql = db::insert(self::$export_table, $fields, true); util::put_file(self::$export_file, $sql . ";\n", FILE_APPEND); } elseif (self::$export_type == 'db') { db::insert(self::$export_table, $fields); } } } } }