makeDir("./data/index/{$key}");
foreach ($files as $file) {
    $fp = fopen($file, "r");
    $file = iconv("gb2312", "utf-8", $file);
    $subdir = basename($file, ".log");
    //$subdir = win_dir_format($subdir);
    $indexSavePath = "./data/index/{$key}/" . $subdir;
    makeDir($indexSavePath);
    $mapFile = $indexSavePath . "/paper_url_mapping.log";
    delFile($mapFile);
    $icount = 1;
    while ($line = readLine($fp)) {
        $arr = explode("\t", $line);
        $u = $arr[6];
        $paperName = $arr[0];
        $paperName = win_dir_format($paperName);
        //echo $paperName . "\n";
        $htmlFileName = $indexSavePath . "/" . $paperName . ".html";
        $tmpFile = iconv("utf-8", "gb2312//IGNORE", $htmlFileName);
        $dbCode = get_db_code($u);
        $fileName = get_file_name($u);
        $tableName = get_table_name($u);
        $realUrl = get_real_url($dbCode, $fileName, $tableName);
        if (file_exists($tmpFile)) {
            if (filesize($tmpFile) < 100) {
                delFile($htmlFileName);
            } else {
                echo "Cache hit! continue -> {$htmlFileName}\n";
                $mapContent = "{$paperName}\t{$realUrl}\n";
                save($mapFile, $mapContent, "a+");
                continue;
function parseContent($content, $fileName, $code)
{
    //save("./tmp.html", $content);
    echo "parseContent : {$fileName} >> ";
    /* 文章名字,作者,学位授予单位,来源数据库,学位授予年度,下载次数,预览地址 */
    $articleName = parseArticleName($content);
    $authors = parseAuthor($content);
    $schools = parseSchool($content);
    $origin = parseOrigin($content);
    $years = parseYear($content);
    //var_dump($origin);exit;
    //$downCount = parseDownCount($content);
    $previewPage = parsePreviewURL($content);
    $abstractUrl = parseAbstractUrl($content);
    //echo count($articleName) . " >> " . count($authors) . " >> " .count($schools) . " >> " .count($origin) . " >> " .count($years) . " \n";
    $saveContent = "";
    $len = count($articleName);
    for ($i = 0; $i < $len; $i++) {
        $articleNm = win_dir_format($articleName[$i]);
        $item = "{$articleNm}\t{$authors[$i]}\t{$schools[$i]}\t{$origin[$i]}\t{$years[$i]}\t{$previewPage[$i]}\t{$abstractUrl[$i]}\t{$code}";
        $saveContent .= "{$item}\n";
    }
    if ($len == 0) {
        echo "Done... but get nothing form {$fileName}\n";
        return;
    }
    save($fileName, $saveContent, "a+");
    echo "Done!\n";
}
$key = $argv[1];
if (!$key) {
    echo "usage \$php abstract.php 'A', 'B' ...\n";
    exit;
}
$files = get_all_log_file("./data/{$key}/");
makeDir("./data/abstract/");
//存放论文摘要,不会重复创建
makeDir("./data/abstract/{$key}");
//’A' , 'B'...
$httpClient = new HttpClient("epub.cnki.net");
foreach ($files as $file) {
    $fp = fopen($file, "r");
    $file = iconv("gb2312", "utf-8", $file);
    $subdir = basename($file, ".log");
    $subdir = win_dir_format($subdir);
    $dataSavePath = "./data/abstract/{$key}/" . $subdir;
    makeDir($dataSavePath);
    makeDir($dataSavePath . "/tmp");
    $mapFile = $dataSavePath . "/paper_abstract_url.log";
    $icount = 1;
    while ($line = readLine($fp)) {
        $sleep = true;
        $arr = explode("\t", $line);
        $u = $arr[6];
        $paperName = $arr[0];
        $code = $arr[7];
        /*获取Referer头*/
        $dbCode = get_db_code($u);
        //CDFD
        $refUrl = get_ref($dbCode);