/** * Get all the lesson pages in a term. * @param $term_no represent the term number (with lesson.) * @return true; */ function get_term_class_pages($term_no) { //require_once(dirname(__FILE__)."\inc.php"); set_time_limit(0); //设置时间无超时停止 get_college_page($term_no); //抓取学院开课页面 foreach ($college_list as $college) { $filepl = "college_%s/%s.txt"; $file = sprintf($filepl, $term_no, $college); $file_name = "resource/" . $file; $output = file_get_contents($file_name); //获取学院开课页面源码 $mode = array(FIRST => "tr", SECOND => "td", LAST => ""); $list = get_attr_lists($output, $mode); //根据学院开课页面源码提取出各种属性 $col_count = count($list[0]); $row_count = count($list); //echo $row_count." ".$col_count; $row = 0; for ($row = 0; $row < $row_count; $row++) { //行数 $col = 0; if ($row == 0 || $row == $row_count - 1) { continue; } for ($col = 0; $col < $col_count; $col++) { //列数 if ($col == lesson_no) { //echo $list[$row][$col]." "; get_class_page($term_no, $list[$row][$col]); //根据学期号,课程号获取课程页面 break; } } } } return true; }
/** * insert college=>lesson one type pages info(no include lesson=>student pages info). * @param $term_no. * @param $log_mode 1(output nots) 0(not output). * notice:local server should already have the web code file(college_lesson file). * use the function in spider.func.php to get them first. */ function insert_college_lesson_in_term($term_no, $log_mode = '1') { require_once ROOT_PATH . "\\Spider\\include\\inc.php"; require_once ROOT_PATH . "\\Spider\\include\\spider.func.php"; require_once ROOT_PATH . "\\Spider\\class\\db\\lesson_spider_db.php"; $db = new cDatabase(DATABASE_INFO_CONSTANTS::$db_info); $college_lesson_db = new college_lesson_spider_db($db); //注意这里如果没有关闭的话,后面会出现错误 $log = new myLog(); $log->setMode($log_mode); foreach ($college_list as $college) { $filepl = "resource/college_%s/%s.txt"; $file = sprintf($filepl, $term_no, $college); $code = file_get_contents($file); $mode = array(FIRST => "tr", SECOND => "td", LAST => ""); $list = get_attr_lists($code, $mode); //print_r($list); $row_count = count($list[0]); $col = 0; $col_count = count($list); $log->add(__FILE__ . ":open " . $college . ".txt"); for ($col = 0; $col < $col_count; $col++) { if ($col == 0) { continue; } if ($col >= $col_count - 1) { //echo "<br>"; break; } //将数据保存到数据库中 $mylist = $college_lesson_list; $mylist[id] = $list[$col][id]; $mylist[college] = $list[$col][college]; $mylist[teacher] = $list[$col][teacher]; $mylist[lesson_time] = $list[$col][lesson_time]; $mylist[major_class] = $list[$col][major_class]; $mylist[lesson_no] = $list[$col][lesson_no]; $mylist[lesson_name] = $list[$col][lesson_name]; $mylist[credit] = $list[$col][credit]; $mylist[total_person] = $list[$col][total_person]; $mylist[limit_person] = $list[$col][limit_person]; $mylist[lesson_type] = $list[$col][lesson_type]; $mylist[weeks] = $list[$col][weeks]; $mylist[room] = $list[$col][room]; $mylist[credit_type] = $list[$col][credit_type]; $mylist[other] = $list[$col][other]; $mylist[term_no] = $term_no; $college_lesson_db->insert_all($mylist); } } }