Пример #1
0
function uget($url, $opts = null)
{
    $t = -microtime(true);
    $rs = odie_get($url, $opts);
    $t += microtime(true);
    $t = intval($t * 1000);
    slog("{$url} [{$rs['0']}]\t{$t} ms");
    return $rs;
}
Пример #2
0
 public static function saveAnswer($base_url, $username, $answer_link_list)
 {
     foreach ($answer_link_list as $url) {
         if (preg_match('%^/question/(\\d+)/answer/(\\d+)%', $url, $matches)) {
             $qid = $matches[1];
             $aid = $matches[2];
         } else {
             echo "{$url} not good\n";
             exit(1);
         }
         $url = $base_url . $url;
         echo "\t{$url}";
         $t = microtime(true);
         list($code, $content) = odie_get($url);
         echo "\t[{$code}]";
         if ($code != 200) {
             // fail fast
             echo "\tfail\n";
             slog("{$url} [{$code}] error");
             $success_ratio = get_average(0, 'success_ratio');
             continue;
         } else {
             $success_ratio = get_average(1, 'success_ratio');
         }
         $t = intval((microtime(true) - $t) * 1000);
         $avg = intval(get_average($t));
         echo "\t{$t} ms\n";
         if (empty($content)) {
             echo "content is empty\n";
             slog("{$url} [{$code}] empty");
             return false;
         }
         list($question, $descript, $content, $vote) = parse_answer_pure($content);
         slog("{$url} [{$code}] ^{$vote}\t{$question}");
         Question::saveQuestion($qid, $question, $descript);
         Answer::_saveAnswer($aid, $qid, $username, $content, $vote);
     }
     if (isset($success_ratio) && isset($avg)) {
         $success_ratio = intval($success_ratio * 100) . '%';
         echo "\tAvg: {$avg} ms\tsuccess_ratio: {$success_ratio}\n";
     }
 }
<?php

require dirname(__DIR__) . "/vendor/autoload.php";
require __DIR__ . "/odie.php";
require __DIR__ . "/logic.php";
require __DIR__ . "/db_init.php";
$base_url = 'http://www.zhihu.com';
$stmt = $pdo->prepare('select id from question');
if (!$stmt->execute()) {
    print_r($stmt->errorInfo());
}
$ids = $stmt->fetchAll(PDO::FETCH_COLUMN);
foreach ($ids as $qid) {
    $url = "{$base_url}/question/{$qid}";
    echo "fetch {$qid}\n";
    list($code, $content) = odie_get($url);
    $username_list = get_username_list($content);
    foreach ($username_list as $username => $nickname) {
        echo "\t{$username} ==> {$nickname}\n";
        $stmt = $pdo->prepare('INSERT INTO user (name, nick_name) VALUES (?,?) ON DUPLICATE KEY UPDATE nick_name=?');
        if (!$stmt->execute(array($username, $nickname, $nickname))) {
            print_r($stmt->errorInfo());
        }
    }
}
Пример #4
0
<?php

require dirname(__DIR__) . "/vendor/autoload.php";
require __DIR__ . "/odie.php";
require __DIR__ . "/logic.php";
$username = '******';
if (isset($argv[1])) {
    $username = $argv[1];
}
$base_url = 'http://www.zhihu.com';
$url = "{$base_url}/people/{$username}/answers";
echo "fetch {$username}\n";
list($code, $content) = odie_get($url);
if ($code == 404) {
    echo "没有这个用户 {$username}\n";
    exit(1);
}
$link_list = get_answer_link_list($content);
save_answer($base_url, $username, $link_list);
$num = get_page_num($content);
if ($num > 1) {
    foreach (range(2, $num) as $i) {
        echo "fetch page {$i}\n";
        $url_page = "{$url}?page={$i}";
        list($_, $content) = odie_get($url_page);
        $link_list = get_answer_link_list($content);
        save_answer($base_url, $username, $link_list);
    }
}
Пример #5
0
function save_answer_to_db($base_url, $username, $answer_link_list)
{
    global $pdo;
    foreach ($answer_link_list as $url) {
        echo "\t{$base_url}{$url}";
        if (preg_match('%^/question/(\\d+)/answer/(\\d+)%', $url, $matches)) {
            $qid = $matches[1];
            $aid = $matches[2];
        } else {
            echo "{$url} not good\n";
            exit(1);
        }
        $url = $base_url . $url;
        list($code, $content) = odie_get($url);
        echo "\t{$code}\n";
        // 自动重刷
        $i = 0;
        while ($code != 200) {
            list($code, $content) = odie_get($url);
            echo "\t{$code}\n";
            if ($i > 5) {
                echo 'can not fetch', "\n";
                return false;
            }
            $i++;
        }
        if (empty($content)) {
            echo "content is empty\n";
            return false;
        }
        list($question, $descript, $content, $vote) = parse_answer_pure($content);
        echo "\t^{$vote}\t{$question}\n";
        $stmt = $pdo->prepare('INSERT INTO question (id, title, description) VALUES (?,?,?) ON DUPLICATE KEY UPDATE title=?,description=?');
        if (!$stmt->execute(array($qid, $question, $descript, $question, $descript))) {
            print_r($stmt->errorInfo());
        }
        $stmt = $pdo->prepare('INSERT INTO answer (id, q_id, user, text, vote) VALUES (?,?,?,?,?) ON DUPLICATE KEY UPDATE text=?, vote=?');
        if (!$stmt->execute(array($aid, $qid, $username, $content, $vote, $content, $vote))) {
            print_r($stmt->errorInfo());
        }
    }
}