Ejemplo n.º 1
0
/**
 * Get all the lesson pages in a term.
 * @param $term_no represent the term number (with lesson.)
 * @return true;
 */
function get_term_class_pages($term_no)
{
    //require_once(dirname(__FILE__)."\inc.php");
    set_time_limit(0);
    //设置时间无超时停止
    get_college_page($term_no);
    //抓取学院开课页面
    foreach ($college_list as $college) {
        $filepl = "college_%s/%s.txt";
        $file = sprintf($filepl, $term_no, $college);
        $file_name = "resource/" . $file;
        $output = file_get_contents($file_name);
        //获取学院开课页面源码
        $mode = array(FIRST => "tr", SECOND => "td", LAST => "");
        $list = get_attr_lists($output, $mode);
        //根据学院开课页面源码提取出各种属性
        $col_count = count($list[0]);
        $row_count = count($list);
        //echo $row_count." ".$col_count;
        $row = 0;
        for ($row = 0; $row < $row_count; $row++) {
            //行数
            $col = 0;
            if ($row == 0 || $row == $row_count - 1) {
                continue;
            }
            for ($col = 0; $col < $col_count; $col++) {
                //列数
                if ($col == lesson_no) {
                    //echo $list[$row][$col]." ";
                    get_class_page($term_no, $list[$row][$col]);
                    //根据学期号,课程号获取课程页面
                    break;
                }
            }
        }
    }
    return true;
}
Ejemplo n.º 2
0
<?php

if (!defined('ROOT_PATH')) {
    define("ROOT_PATH", substr(dirname(__FILE__), 0, -7));
    //获取根目录
}
//第一步:先爬取学院网页
require_once ROOT_PATH . "\\Spider\\include\\spider.func.php";
set_time_limit(0);
get_college_page(20131);
//第二步:先将学院_课程数据插入数据库。
/*set_time_limit(0);
insert_college_lesson_in_term("20132");*/
//第三步:将全部课程_学生数据插入数据库。
require_once ROOT_PATH . "\\Spider\\include\\inc.php";
require_once ROOT_PATH . "\\Spider\\include\\spider.func.php";
require_once ROOT_PATH . "\\Spider\\class\\db\\lesson_spider_db.php";
require_once ROOT_PATH . "\\Spider\\class\\db\\database.php";
//使用这种方式包括文件才不会出错
set_time_limit(0);
$db = new cDatabase(DATABASE_INFO_CONSTANTS::$db_info);
$college_lesson_db = new college_lesson_spider_db($db);
//注意这里如果没有关闭的话,后面会出现错误
$lesson_db = new lesson_student_spider_db($db);
//打开课程学生数据库操作类
$log = new myLog();
$log->setMode(0);
//设置日志不输出
$my_dir = "resource/all_info_20132/";
$handler = opendir($my_dir);
while (($filename = readdir($handler)) !== false) {