public function login_call($serialized_request) { $client = new HttpClient($this->domain); $client->cookie_host = $this->domain; $client->post($this->location . "login/json/", $serialized_request); $this->cookies = $client->getCookies(); return $client->getContent(); }
/** * @return HttpClient */ public function getRemoteConnexion(&$remoteSecureToken, $refreshSessId = false, $repository = null) { require_once AJXP_BIN_FOLDER . "/class.HttpClient.php"; if ($repository != null) { $crtRep = $repository; } else { $crtRep = ConfService::getRepository(); } $httpClient = new HttpClient($crtRep->getOption("HOST")); $httpClient->cookie_host = $crtRep->getOption("HOST"); $httpClient->timeout = 10; if (isset($_SESSION["AJXP_REMOTE_SESSION"]) && is_array($_SESSION["AJXP_REMOTE_SESSION"])) { $httpClient->setCookies($_SESSION["AJXP_REMOTE_SESSION"]); } //$httpClient->setDebug(true); if (!isset($_SESSION["AJXP_REMOTE_SECURE_TOKEN"])) { $httpClient->get($crtRep->getOption("URI") . "?get_action=get_secure_token"); $remoteSecureToken = $httpClient->getContent(); $_SESSION["AJXP_REMOTE_SECURE_TOKEN"] = $remoteSecureToken; } else { $remoteSecureToken = $_SESSION["AJXP_REMOTE_SECURE_TOKEN"]; } if (!$crtRep->getOption("USE_AUTH")) { return $httpClient; } $uri = ""; if ($crtRep->getOption("AUTH_URI") != "") { $httpClient->setAuthorization($crtRep->getOption("AUTH_USER"), $crtRep->getOption("AUTH_PASS")); $uri = $crtRep->getOption("AUTH_URI") . "?secure_token={$remoteSecureToken}"; } if (!isset($_SESSION["AJXP_REMOTE_SESSION"]) || !is_array($_SESSION["AJXP_REMOTE_SESSION"]) || $refreshSessId) { if ($uri == "") { $this->logDebug("Remote_fs : relog necessary"); // Retrieve a seed! $httpClient->get($crtRep->getOption("URI") . "?get_action=get_seed&secure_token={$remoteSecureToken}"); $seed = $httpClient->getContent(); $cookies = $httpClient->getCookies(); if (isset($cookies["AjaXplorer"])) { $_SESSION["AJXP_REMOTE_SESSION"] = $cookies; } $user = $crtRep->getOption("AUTH_USER"); $pass = $crtRep->getOption("AUTH_PASS"); $pass = md5(md5($pass) . $seed); $uri = $crtRep->getOption("URI") . "?get_action=login&userid=" . $user . "&password="******"&login_seed={$seed}&secure_token={$remoteSecureToken}"; $httpClient->get($uri); $content = $httpClient->getContent(); $matches = array(); if (preg_match_all('#.*?secure_token="(.*?)".*?#s', $content, $matches)) { $remoteSecureToken = $matches[1][0]; $_SESSION["AJXP_REMOTE_SECURE_TOKEN"] = $remoteSecureToken; } $httpClient->setHeadersOnly(false); } else { $httpClient->setHeadersOnly(true); $httpClient->get($uri); $httpClient->setHeadersOnly(false); } $cookies = $httpClient->getCookies(); $_SESSION["AJXP_REMOTE_SESSION"] = $httpClient->getCookies(); } else { $httpClient->setCookies($_SESSION["AJXP_REMOTE_SESSION"]); } return $httpClient; }
/** * @return HttpClient */ function getRemoteConnexion(&$remoteSessionId, $refreshSessId = false) { require_once INSTALL_PATH . "/server/classes/class.HttpClient.php"; $crtRep = ConfService::getRepository(); $httpClient = new HttpClient($crtRep->getOption("HOST")); $httpClient->cookie_host = $crtRep->getOption("HOST"); $httpClient->timeout = 50; //$httpClient->setDebug(true); if ($crtRep->getOption("AUTH_URI") != "") { $httpClient->setAuthorization($crtRep->getOption("AUTH_NAME"), $crtRep->getOption("AUTH_PASS")); } if (!isset($_SESSION["AJXP_REMOTE_SESSION"]) || $refreshSessId) { $httpClient->setHeadersOnly(true); $httpClient->get($crtRep->getOption("AUTH_URI")); $httpClient->setHeadersOnly(false); $cookies = $httpClient->getCookies(); if (isset($cookies["PHPSESSID"])) { $_SESSION["AJXP_REMOTE_SESSION"] = $cookies["PHPSESSID"]; $remoteSessionId = $cookies["PHPSESSID"]; } } else { $remoteSessionId = $_SESSION["AJXP_REMOTE_SESSION"]; $httpClient->setCookies(array("PHPSESSID" => $remoteSessionId)); } return $httpClient; }
function main($subDir, $class, $cookieURL, $indexURL, $totalClass, $curClass, $code) { $isSleep = true; makeDir("./html/{$subDir}/{$class}/"); $dataFileName = "data/{$subDir}/{$class}.log"; $httpClient = new HttpClient("epub.cnki.net"); $content = ""; $indexFname = "./html/{$subDir}/{$class}/index.html"; $tf = iconv("utf-8", "gb2312", $indexFname); $cookies = ""; if (file_exists($tf)) { $isSleep = false; $content = file_get_contents($tf); echo "From cache get index.....\n"; } else { /*获取并设置cookie*/ $httpClient->get($cookieURL); $cookies = $httpClient->getCookies(); $httpClient->setCookies($cookies); if (!$cookies) { die("cookie error"); } $isSleep = true; $httpClient->get($indexURL); $content = $httpClient->getContent(); save($indexFname, $content); //保存 echo "save index file...\n"; } /* 解析出一共有多少页面 */ $pageCount = parsePageCount($content); echo "Page is {$pageCount} ****\n"; $articleCount = ARTICLE_PRE_PAGE * $pageCount; //计算一共有多少篇文章,大于等于实际文章数目,不影响结果 echo "total article is {$articleCount}\n"; $pageCount = $articleCount / ARTICLE_PRE_PAGE; $pageCount = ceil($pageCount); //向上取整,不放过任何数据 if ($pageCount == 0) { $pageCount = 1; } if ($pageCount > 50) { echo "page count is big than 50\n"; } echo "total page of {$class} is : {$pageCount}...............{$curClass} of {$totalClass}\n"; if ($isSleep) { fakeSleep(); } /* 抓取每一个页面并且保存下来,保存的同时进行解析 */ for ($i = 1; $i <= $pageCount; $i++) { $content = NULL; $pageI = getPageI($indexURL, $i); //第i页的地址 $htmlI = "./html/{$subDir}/{$class}/{$i}.html"; if (!file_exists(iconv("utf-8", "gb2312", $htmlI))) { $isSleep = true; $httpClient->setCookies($cookies); $httpClient->get($pageI); $content = $httpClient->getContent(); save($htmlI, $content); echo "From newwork & save {$i}.html..........[{$i} of {$pageCount}]\n"; } else { $tmpf2 = iconv("utf-8", "gb2312", $htmlI); $content = file_get_contents($tmpf2); $ok = validatePageContent($content); //是否出现了验证码 if (!$ok) { $i = $i - 1; delFile($htmlI); } else { $isSleep = false; echo "Find local file {$htmlI} & skip\n"; } //continue; } $logName = "./data/{$subDir}/{$class}.log"; if (!validatePageContent($content)) { $i = $i - 1; delFile($htmlI); dosleep(60); $httpClient = new HttpClient("epub.cnki.net"); $httpClient->get($cookieURL); $cookies = $httpClient->getCookies(); $httpClient->setCookies($cookies); continue; } parseContent($content, $logName, $code); if ($i != $pageCount && $isSleep) { fakeSleep(); } else { echo "+\n"; echo "+\n"; echo "+ {$class} done\n"; echo "+\n"; echo "+\n"; } } }
/** * Initialize and return the HttpClient * * @return HttpClient */ protected function createHttpClient() { require_once INSTALL_PATH . "/server/classes/class.HttpClient.php"; $httpClient = new HttpClient($this->host); $httpClient->cookie_host = $this->host; $httpClient->timeout = 50; AJXP_Logger::debug("Creating Http client", array()); //$httpClient->setDebug(true); if (!$this->use_auth) { return $httpClient; } $uri = ""; if ($this->auth_path != "") { $httpClient->setAuthorization($this->user, $this->password); $uri = $this->auth_path; } if (!isset($_SESSION["AJXP_REMOTE_SESSION"])) { if ($uri == "") { // Retrieve a seed! $httpClient->get($this->path . "?get_action=get_seed"); $seed = $httpClient->getContent(); $user = $this->user; $pass = $this->password; $pass = md5(md5($pass) . $seed); $uri = $this->path . "?get_action=login&userid=" . $user . "&password="******"&login_seed={$seed}"; } $httpClient->setHeadersOnly(true); $httpClient->get($uri); $httpClient->setHeadersOnly(false); $cookies = $httpClient->getCookies(); if (isset($cookies["AjaXplorer"])) { $_SESSION["AJXP_REMOTE_SESSION"] = $cookies["AjaXplorer"]; $remoteSessionId = $cookies["AjaXplorer"]; } } else { $remoteSessionId = $_SESSION["AJXP_REMOTE_SESSION"]; $httpClient->setCookies(array("AjaXplorer" => $remoteSessionId)); } AJXP_Logger::debug("Http Client created", array()); return $httpClient; }
/** * @return HttpClient */ function getRemoteConnexion(&$remoteSessionId, $refreshSessId = false) { require_once INSTALL_PATH . "/server/classes/class.HttpClient.php"; $crtRep = ConfService::getRepository(); $httpClient = new HttpClient($crtRep->getOption("HOST")); $httpClient->cookie_host = $crtRep->getOption("HOST"); $httpClient->timeout = 10; //$httpClient->setDebug(true); if (!$crtRep->getOption("USE_AUTH")) { return $httpClient; } $uri = ""; if ($crtRep->getOption("AUTH_URI") != "") { $httpClient->setAuthorization($crtRep->getOption("AUTH_USER"), $crtRep->getOption("AUTH_PASS")); $uri = $crtRep->getOption("AUTH_URI"); } if (!isset($_SESSION["AJXP_REMOTE_SESSION"]) || $refreshSessId) { if ($uri == "") { // Retrieve a seed! $httpClient->get($crtRep->getOption("URI") . "?get_action=get_seed"); $seed = $httpClient->getContent(); $user = $crtRep->getOption("AUTH_USER"); $pass = $crtRep->getOption("AUTH_PASS"); $pass = md5(md5($pass) . $seed); $uri = $crtRep->getOption("URI") . "?get_action=login&userid=" . $user . "&password="******"&login_seed={$seed}"; } $httpClient->setHeadersOnly(true); $httpClient->get($uri); $httpClient->setHeadersOnly(false); $cookies = $httpClient->getCookies(); if (isset($cookies["AjaXplorer"])) { $_SESSION["AJXP_REMOTE_SESSION"] = $cookies["AjaXplorer"]; $remoteSessionId = $cookies["AjaXplorer"]; } } else { $remoteSessionId = $_SESSION["AJXP_REMOTE_SESSION"]; $httpClient->setCookies(array("AjaXplorer" => $remoteSessionId)); } return $httpClient; }
/*获取Referer头*/ $dbCode = get_db_code($u); //CDFD $refUrl = get_ref($dbCode); $cachedHtml = $dataSavePath . "/tmp/{$paperName}.html"; $absPath = $dataSavePath . "/" . $paperName . ".log"; //echo "Cache check $cachedHtml..."; echo iconv("utf-8", "gb2312", $cachedHtml) . " .... "; $content = ""; $localedCachedHtml = iconv("utf-8", "gb2312//IGNORE", $cachedHtml); if (!file_exists($localedCachedHtml)) { $sleep = true; echo "Miss!\n"; $cookieURL = getCookieURL($code); /*获取并设置cookie*/ $cookies = $httpClient->getCookies(); if (!$cookies) { do { $httpClient->get($cookieURL); $cookies = $httpClient->getCookies(); if (!$cookies) { $sc = 30; echo "Cookie是空的,睡眠{$sc} S\n"; sleep($sc); } else { $httpClient->setCookies($cookies); } // $httpClient->get($cookieURL); // $cookies = $httpClient->getCookies(); } while (!$cookies); }
private static function send($url, $cookies, RequestSet $set, $cookieTable) { $bits = parse_url($url); $host = $bits['host']; $port = isset($bits['port']) ? $bits['port'] : 80; $path = isset($bits['path']) ? $bits['path'] : '/'; $conn = new HttpClient($host, $port); $conn->setCookies($cookieTable); $conn->setContentType("text/xml;charset=UTF-8"); // Output ... $xml = $set->toXMLString(); if (!$conn->post($path, $xml)) { throw new Exception("PLLClient send exception"); } // Input ... $in_string = $conn->getContent(); $cookieTable = $conn->getCookies(); $resset = ResponseSet::parseXML($in_string); return $resset->getResponses(); }