public function login_call($serialized_request)
 {
     $client = new HttpClient($this->domain);
     $client->cookie_host = $this->domain;
     $client->post($this->location . "login/json/", $serialized_request);
     $this->cookies = $client->getCookies();
     return $client->getContent();
 }
 /**
  * @return HttpClient
  */
 public function getRemoteConnexion(&$remoteSecureToken, $refreshSessId = false, $repository = null)
 {
     require_once AJXP_BIN_FOLDER . "/class.HttpClient.php";
     if ($repository != null) {
         $crtRep = $repository;
     } else {
         $crtRep = ConfService::getRepository();
     }
     $httpClient = new HttpClient($crtRep->getOption("HOST"));
     $httpClient->cookie_host = $crtRep->getOption("HOST");
     $httpClient->timeout = 10;
     if (isset($_SESSION["AJXP_REMOTE_SESSION"]) && is_array($_SESSION["AJXP_REMOTE_SESSION"])) {
         $httpClient->setCookies($_SESSION["AJXP_REMOTE_SESSION"]);
     }
     //$httpClient->setDebug(true);
     if (!isset($_SESSION["AJXP_REMOTE_SECURE_TOKEN"])) {
         $httpClient->get($crtRep->getOption("URI") . "?get_action=get_secure_token");
         $remoteSecureToken = $httpClient->getContent();
         $_SESSION["AJXP_REMOTE_SECURE_TOKEN"] = $remoteSecureToken;
     } else {
         $remoteSecureToken = $_SESSION["AJXP_REMOTE_SECURE_TOKEN"];
     }
     if (!$crtRep->getOption("USE_AUTH")) {
         return $httpClient;
     }
     $uri = "";
     if ($crtRep->getOption("AUTH_URI") != "") {
         $httpClient->setAuthorization($crtRep->getOption("AUTH_USER"), $crtRep->getOption("AUTH_PASS"));
         $uri = $crtRep->getOption("AUTH_URI") . "?secure_token={$remoteSecureToken}";
     }
     if (!isset($_SESSION["AJXP_REMOTE_SESSION"]) || !is_array($_SESSION["AJXP_REMOTE_SESSION"]) || $refreshSessId) {
         if ($uri == "") {
             $this->logDebug("Remote_fs : relog necessary");
             // Retrieve a seed!
             $httpClient->get($crtRep->getOption("URI") . "?get_action=get_seed&secure_token={$remoteSecureToken}");
             $seed = $httpClient->getContent();
             $cookies = $httpClient->getCookies();
             if (isset($cookies["AjaXplorer"])) {
                 $_SESSION["AJXP_REMOTE_SESSION"] = $cookies;
             }
             $user = $crtRep->getOption("AUTH_USER");
             $pass = $crtRep->getOption("AUTH_PASS");
             $pass = md5(md5($pass) . $seed);
             $uri = $crtRep->getOption("URI") . "?get_action=login&userid=" . $user . "&password="******"&login_seed={$seed}&secure_token={$remoteSecureToken}";
             $httpClient->get($uri);
             $content = $httpClient->getContent();
             $matches = array();
             if (preg_match_all('#.*?secure_token="(.*?)".*?#s', $content, $matches)) {
                 $remoteSecureToken = $matches[1][0];
                 $_SESSION["AJXP_REMOTE_SECURE_TOKEN"] = $remoteSecureToken;
             }
             $httpClient->setHeadersOnly(false);
         } else {
             $httpClient->setHeadersOnly(true);
             $httpClient->get($uri);
             $httpClient->setHeadersOnly(false);
         }
         $cookies = $httpClient->getCookies();
         $_SESSION["AJXP_REMOTE_SESSION"] = $httpClient->getCookies();
     } else {
         $httpClient->setCookies($_SESSION["AJXP_REMOTE_SESSION"]);
     }
     return $httpClient;
 }
 /**
  * @return HttpClient
  */
 function getRemoteConnexion(&$remoteSessionId, $refreshSessId = false)
 {
     require_once INSTALL_PATH . "/server/classes/class.HttpClient.php";
     $crtRep = ConfService::getRepository();
     $httpClient = new HttpClient($crtRep->getOption("HOST"));
     $httpClient->cookie_host = $crtRep->getOption("HOST");
     $httpClient->timeout = 50;
     //$httpClient->setDebug(true);
     if ($crtRep->getOption("AUTH_URI") != "") {
         $httpClient->setAuthorization($crtRep->getOption("AUTH_NAME"), $crtRep->getOption("AUTH_PASS"));
     }
     if (!isset($_SESSION["AJXP_REMOTE_SESSION"]) || $refreshSessId) {
         $httpClient->setHeadersOnly(true);
         $httpClient->get($crtRep->getOption("AUTH_URI"));
         $httpClient->setHeadersOnly(false);
         $cookies = $httpClient->getCookies();
         if (isset($cookies["PHPSESSID"])) {
             $_SESSION["AJXP_REMOTE_SESSION"] = $cookies["PHPSESSID"];
             $remoteSessionId = $cookies["PHPSESSID"];
         }
     } else {
         $remoteSessionId = $_SESSION["AJXP_REMOTE_SESSION"];
         $httpClient->setCookies(array("PHPSESSID" => $remoteSessionId));
     }
     return $httpClient;
 }
Пример #4
0
function main($subDir, $class, $cookieURL, $indexURL, $totalClass, $curClass, $code)
{
    $isSleep = true;
    makeDir("./html/{$subDir}/{$class}/");
    $dataFileName = "data/{$subDir}/{$class}.log";
    $httpClient = new HttpClient("epub.cnki.net");
    $content = "";
    $indexFname = "./html/{$subDir}/{$class}/index.html";
    $tf = iconv("utf-8", "gb2312", $indexFname);
    $cookies = "";
    if (file_exists($tf)) {
        $isSleep = false;
        $content = file_get_contents($tf);
        echo "From cache get index.....\n";
    } else {
        /*获取并设置cookie*/
        $httpClient->get($cookieURL);
        $cookies = $httpClient->getCookies();
        $httpClient->setCookies($cookies);
        if (!$cookies) {
            die("cookie error");
        }
        $isSleep = true;
        $httpClient->get($indexURL);
        $content = $httpClient->getContent();
        save($indexFname, $content);
        //保存
        echo "save index file...\n";
    }
    /* 解析出一共有多少页面 */
    $pageCount = parsePageCount($content);
    echo "Page is {$pageCount} ****\n";
    $articleCount = ARTICLE_PRE_PAGE * $pageCount;
    //计算一共有多少篇文章,大于等于实际文章数目,不影响结果
    echo "total article is {$articleCount}\n";
    $pageCount = $articleCount / ARTICLE_PRE_PAGE;
    $pageCount = ceil($pageCount);
    //向上取整,不放过任何数据
    if ($pageCount == 0) {
        $pageCount = 1;
    }
    if ($pageCount > 50) {
        echo "page count is big than 50\n";
    }
    echo "total page of {$class} is : {$pageCount}...............{$curClass} of {$totalClass}\n";
    if ($isSleep) {
        fakeSleep();
    }
    /* 抓取每一个页面并且保存下来,保存的同时进行解析 */
    for ($i = 1; $i <= $pageCount; $i++) {
        $content = NULL;
        $pageI = getPageI($indexURL, $i);
        //第i页的地址
        $htmlI = "./html/{$subDir}/{$class}/{$i}.html";
        if (!file_exists(iconv("utf-8", "gb2312", $htmlI))) {
            $isSleep = true;
            $httpClient->setCookies($cookies);
            $httpClient->get($pageI);
            $content = $httpClient->getContent();
            save($htmlI, $content);
            echo "From newwork & save {$i}.html..........[{$i} of {$pageCount}]\n";
        } else {
            $tmpf2 = iconv("utf-8", "gb2312", $htmlI);
            $content = file_get_contents($tmpf2);
            $ok = validatePageContent($content);
            //是否出现了验证码
            if (!$ok) {
                $i = $i - 1;
                delFile($htmlI);
            } else {
                $isSleep = false;
                echo "Find local file {$htmlI} & skip\n";
            }
            //continue;
        }
        $logName = "./data/{$subDir}/{$class}.log";
        if (!validatePageContent($content)) {
            $i = $i - 1;
            delFile($htmlI);
            dosleep(60);
            $httpClient = new HttpClient("epub.cnki.net");
            $httpClient->get($cookieURL);
            $cookies = $httpClient->getCookies();
            $httpClient->setCookies($cookies);
            continue;
        }
        parseContent($content, $logName, $code);
        if ($i != $pageCount && $isSleep) {
            fakeSleep();
        } else {
            echo "+\n";
            echo "+\n";
            echo "+ {$class} done\n";
            echo "+\n";
            echo "+\n";
        }
    }
}
 /**
  * Initialize and return the HttpClient
  *
  * @return HttpClient
  */
 protected function createHttpClient()
 {
     require_once INSTALL_PATH . "/server/classes/class.HttpClient.php";
     $httpClient = new HttpClient($this->host);
     $httpClient->cookie_host = $this->host;
     $httpClient->timeout = 50;
     AJXP_Logger::debug("Creating Http client", array());
     //$httpClient->setDebug(true);
     if (!$this->use_auth) {
         return $httpClient;
     }
     $uri = "";
     if ($this->auth_path != "") {
         $httpClient->setAuthorization($this->user, $this->password);
         $uri = $this->auth_path;
     }
     if (!isset($_SESSION["AJXP_REMOTE_SESSION"])) {
         if ($uri == "") {
             // Retrieve a seed!
             $httpClient->get($this->path . "?get_action=get_seed");
             $seed = $httpClient->getContent();
             $user = $this->user;
             $pass = $this->password;
             $pass = md5(md5($pass) . $seed);
             $uri = $this->path . "?get_action=login&userid=" . $user . "&password="******"&login_seed={$seed}";
         }
         $httpClient->setHeadersOnly(true);
         $httpClient->get($uri);
         $httpClient->setHeadersOnly(false);
         $cookies = $httpClient->getCookies();
         if (isset($cookies["AjaXplorer"])) {
             $_SESSION["AJXP_REMOTE_SESSION"] = $cookies["AjaXplorer"];
             $remoteSessionId = $cookies["AjaXplorer"];
         }
     } else {
         $remoteSessionId = $_SESSION["AJXP_REMOTE_SESSION"];
         $httpClient->setCookies(array("AjaXplorer" => $remoteSessionId));
     }
     AJXP_Logger::debug("Http Client created", array());
     return $httpClient;
 }
 /**
  * @return HttpClient
  */
 function getRemoteConnexion(&$remoteSessionId, $refreshSessId = false)
 {
     require_once INSTALL_PATH . "/server/classes/class.HttpClient.php";
     $crtRep = ConfService::getRepository();
     $httpClient = new HttpClient($crtRep->getOption("HOST"));
     $httpClient->cookie_host = $crtRep->getOption("HOST");
     $httpClient->timeout = 10;
     //$httpClient->setDebug(true);
     if (!$crtRep->getOption("USE_AUTH")) {
         return $httpClient;
     }
     $uri = "";
     if ($crtRep->getOption("AUTH_URI") != "") {
         $httpClient->setAuthorization($crtRep->getOption("AUTH_USER"), $crtRep->getOption("AUTH_PASS"));
         $uri = $crtRep->getOption("AUTH_URI");
     }
     if (!isset($_SESSION["AJXP_REMOTE_SESSION"]) || $refreshSessId) {
         if ($uri == "") {
             // Retrieve a seed!
             $httpClient->get($crtRep->getOption("URI") . "?get_action=get_seed");
             $seed = $httpClient->getContent();
             $user = $crtRep->getOption("AUTH_USER");
             $pass = $crtRep->getOption("AUTH_PASS");
             $pass = md5(md5($pass) . $seed);
             $uri = $crtRep->getOption("URI") . "?get_action=login&userid=" . $user . "&password="******"&login_seed={$seed}";
         }
         $httpClient->setHeadersOnly(true);
         $httpClient->get($uri);
         $httpClient->setHeadersOnly(false);
         $cookies = $httpClient->getCookies();
         if (isset($cookies["AjaXplorer"])) {
             $_SESSION["AJXP_REMOTE_SESSION"] = $cookies["AjaXplorer"];
             $remoteSessionId = $cookies["AjaXplorer"];
         }
     } else {
         $remoteSessionId = $_SESSION["AJXP_REMOTE_SESSION"];
         $httpClient->setCookies(array("AjaXplorer" => $remoteSessionId));
     }
     return $httpClient;
 }
Пример #7
0
 /*获取Referer头*/
 $dbCode = get_db_code($u);
 //CDFD
 $refUrl = get_ref($dbCode);
 $cachedHtml = $dataSavePath . "/tmp/{$paperName}.html";
 $absPath = $dataSavePath . "/" . $paperName . ".log";
 //echo "Cache check $cachedHtml...";
 echo iconv("utf-8", "gb2312", $cachedHtml) . " .... ";
 $content = "";
 $localedCachedHtml = iconv("utf-8", "gb2312//IGNORE", $cachedHtml);
 if (!file_exists($localedCachedHtml)) {
     $sleep = true;
     echo "Miss!\n";
     $cookieURL = getCookieURL($code);
     /*获取并设置cookie*/
     $cookies = $httpClient->getCookies();
     if (!$cookies) {
         do {
             $httpClient->get($cookieURL);
             $cookies = $httpClient->getCookies();
             if (!$cookies) {
                 $sc = 30;
                 echo "Cookie是空的,睡眠{$sc} S\n";
                 sleep($sc);
             } else {
                 $httpClient->setCookies($cookies);
             }
             // $httpClient->get($cookieURL);
             // $cookies = $httpClient->getCookies();
         } while (!$cookies);
     }
Пример #8
0
 private static function send($url, $cookies, RequestSet $set, $cookieTable)
 {
     $bits = parse_url($url);
     $host = $bits['host'];
     $port = isset($bits['port']) ? $bits['port'] : 80;
     $path = isset($bits['path']) ? $bits['path'] : '/';
     $conn = new HttpClient($host, $port);
     $conn->setCookies($cookieTable);
     $conn->setContentType("text/xml;charset=UTF-8");
     // Output ...
     $xml = $set->toXMLString();
     if (!$conn->post($path, $xml)) {
         throw new Exception("PLLClient send exception");
     }
     // Input ...
     $in_string = $conn->getContent();
     $cookieTable = $conn->getCookies();
     $resset = ResponseSet::parseXML($in_string);
     return $resset->getResponses();
 }