/** * Get all the lesson pages in a term. * @param $term_no represent the term number (with lesson.) * @return true; */ function get_term_class_pages($term_no) { //require_once(dirname(__FILE__)."\inc.php"); set_time_limit(0); //设置时间无超时停止 get_college_page($term_no); //抓取学院开课页面 foreach ($college_list as $college) { $filepl = "college_%s/%s.txt"; $file = sprintf($filepl, $term_no, $college); $file_name = "resource/" . $file; $output = file_get_contents($file_name); //获取学院开课页面源码 $mode = array(FIRST => "tr", SECOND => "td", LAST => ""); $list = get_attr_lists($output, $mode); //根据学院开课页面源码提取出各种属性 $col_count = count($list[0]); $row_count = count($list); //echo $row_count." ".$col_count; $row = 0; for ($row = 0; $row < $row_count; $row++) { //行数 $col = 0; if ($row == 0 || $row == $row_count - 1) { continue; } for ($col = 0; $col < $col_count; $col++) { //列数 if ($col == lesson_no) { //echo $list[$row][$col]." "; get_class_page($term_no, $list[$row][$col]); //根据学期号,课程号获取课程页面 break; } } } } return true; }
<?php if (!defined('ROOT_PATH')) { define("ROOT_PATH", substr(dirname(__FILE__), 0, -7)); //获取根目录 } //第一步:先爬取学院网页 require_once ROOT_PATH . "\\Spider\\include\\spider.func.php"; set_time_limit(0); get_college_page(20131); //第二步:先将学院_课程数据插入数据库。 /*set_time_limit(0); insert_college_lesson_in_term("20132");*/ //第三步:将全部课程_学生数据插入数据库。 require_once ROOT_PATH . "\\Spider\\include\\inc.php"; require_once ROOT_PATH . "\\Spider\\include\\spider.func.php"; require_once ROOT_PATH . "\\Spider\\class\\db\\lesson_spider_db.php"; require_once ROOT_PATH . "\\Spider\\class\\db\\database.php"; //使用这种方式包括文件才不会出错 set_time_limit(0); $db = new cDatabase(DATABASE_INFO_CONSTANTS::$db_info); $college_lesson_db = new college_lesson_spider_db($db); //注意这里如果没有关闭的话,后面会出现错误 $lesson_db = new lesson_student_spider_db($db); //打开课程学生数据库操作类 $log = new myLog(); $log->setMode(0); //设置日志不输出 $my_dir = "resource/all_info_20132/"; $handler = opendir($my_dir); while (($filename = readdir($handler)) !== false) {