function uget($url, $opts = null) { $t = -microtime(true); $rs = odie_get($url, $opts); $t += microtime(true); $t = intval($t * 1000); slog("{$url} [{$rs['0']}]\t{$t} ms"); return $rs; }
public static function saveAnswer($base_url, $username, $answer_link_list) { foreach ($answer_link_list as $url) { if (preg_match('%^/question/(\\d+)/answer/(\\d+)%', $url, $matches)) { $qid = $matches[1]; $aid = $matches[2]; } else { echo "{$url} not good\n"; exit(1); } $url = $base_url . $url; echo "\t{$url}"; $t = microtime(true); list($code, $content) = odie_get($url); echo "\t[{$code}]"; if ($code != 200) { // fail fast echo "\tfail\n"; slog("{$url} [{$code}] error"); $success_ratio = get_average(0, 'success_ratio'); continue; } else { $success_ratio = get_average(1, 'success_ratio'); } $t = intval((microtime(true) - $t) * 1000); $avg = intval(get_average($t)); echo "\t{$t} ms\n"; if (empty($content)) { echo "content is empty\n"; slog("{$url} [{$code}] empty"); return false; } list($question, $descript, $content, $vote) = parse_answer_pure($content); slog("{$url} [{$code}] ^{$vote}\t{$question}"); Question::saveQuestion($qid, $question, $descript); Answer::_saveAnswer($aid, $qid, $username, $content, $vote); } if (isset($success_ratio) && isset($avg)) { $success_ratio = intval($success_ratio * 100) . '%'; echo "\tAvg: {$avg} ms\tsuccess_ratio: {$success_ratio}\n"; } }
<?php require dirname(__DIR__) . "/vendor/autoload.php"; require __DIR__ . "/odie.php"; require __DIR__ . "/logic.php"; require __DIR__ . "/db_init.php"; $base_url = 'http://www.zhihu.com'; $stmt = $pdo->prepare('select id from question'); if (!$stmt->execute()) { print_r($stmt->errorInfo()); } $ids = $stmt->fetchAll(PDO::FETCH_COLUMN); foreach ($ids as $qid) { $url = "{$base_url}/question/{$qid}"; echo "fetch {$qid}\n"; list($code, $content) = odie_get($url); $username_list = get_username_list($content); foreach ($username_list as $username => $nickname) { echo "\t{$username} ==> {$nickname}\n"; $stmt = $pdo->prepare('INSERT INTO user (name, nick_name) VALUES (?,?) ON DUPLICATE KEY UPDATE nick_name=?'); if (!$stmt->execute(array($username, $nickname, $nickname))) { print_r($stmt->errorInfo()); } } }
<?php require dirname(__DIR__) . "/vendor/autoload.php"; require __DIR__ . "/odie.php"; require __DIR__ . "/logic.php"; $username = '******'; if (isset($argv[1])) { $username = $argv[1]; } $base_url = 'http://www.zhihu.com'; $url = "{$base_url}/people/{$username}/answers"; echo "fetch {$username}\n"; list($code, $content) = odie_get($url); if ($code == 404) { echo "没有这个用户 {$username}\n"; exit(1); } $link_list = get_answer_link_list($content); save_answer($base_url, $username, $link_list); $num = get_page_num($content); if ($num > 1) { foreach (range(2, $num) as $i) { echo "fetch page {$i}\n"; $url_page = "{$url}?page={$i}"; list($_, $content) = odie_get($url_page); $link_list = get_answer_link_list($content); save_answer($base_url, $username, $link_list); } }
function save_answer_to_db($base_url, $username, $answer_link_list) { global $pdo; foreach ($answer_link_list as $url) { echo "\t{$base_url}{$url}"; if (preg_match('%^/question/(\\d+)/answer/(\\d+)%', $url, $matches)) { $qid = $matches[1]; $aid = $matches[2]; } else { echo "{$url} not good\n"; exit(1); } $url = $base_url . $url; list($code, $content) = odie_get($url); echo "\t{$code}\n"; // 自动重刷 $i = 0; while ($code != 200) { list($code, $content) = odie_get($url); echo "\t{$code}\n"; if ($i > 5) { echo 'can not fetch', "\n"; return false; } $i++; } if (empty($content)) { echo "content is empty\n"; return false; } list($question, $descript, $content, $vote) = parse_answer_pure($content); echo "\t^{$vote}\t{$question}\n"; $stmt = $pdo->prepare('INSERT INTO question (id, title, description) VALUES (?,?,?) ON DUPLICATE KEY UPDATE title=?,description=?'); if (!$stmt->execute(array($qid, $question, $descript, $question, $descript))) { print_r($stmt->errorInfo()); } $stmt = $pdo->prepare('INSERT INTO answer (id, q_id, user, text, vote) VALUES (?,?,?,?,?) ON DUPLICATE KEY UPDATE text=?, vote=?'); if (!$stmt->execute(array($aid, $qid, $username, $content, $vote, $content, $vote))) { print_r($stmt->errorInfo()); } } }