/** * Crawl single URL * @param string $url * @param int $depth */ protected function traverseSingle($url, $depth) { try { $client = new Client(); $client->followRedirects(); $crawler = $client->request('GET', $url); $statusCode = $client->getResponse()->getStatus(); $hash = $this->getPathFromUrl($url); $this->links[$hash]['status_code'] = $statusCode; if ($statusCode === 200) { $content_type = $client->getResponse()->getHeader('Content-Type'); if (strpos($content_type, 'text/html') !== false) { //traverse children in case the response in HTML document only $this->extractTitleInfo($crawler, $hash); $childLinks = array(); if (isset($this->links[$hash]['external_link']) === true && $this->links[$hash]['external_link'] === false) { $childLinks = $this->extractLinksInfo($crawler, $hash); } $this->links[$hash]['visited'] = true; $this->traverseChildren($childLinks, $depth - 1); } } } catch (CurlException $e) { $this->links[$url]['status_code'] = '404'; $this->links[$url]['error_code'] = $e->getCode(); $this->links[$url]['error_message'] = $e->getMessage(); } catch (\Exception $e) { $this->links[$url]['status_code'] = '404'; $this->links[$url]['error_code'] = $e->getCode(); $this->links[$url]['error_message'] = $e->getMessage(); } }
private function fetchDetails() { $url = 'https://www.twitter.com/' . $this->getAccountName(); $client = new Client(); $client->followRedirects(); $crawler = $client->request('GET', $url); /** * @var Response $response */ $response = $client->getResponse(); if ($response->getStatus() != '200') { $this->setIsNotFound(true); return false; } // -- if (stripos($response->getContent(), 'suspended')) { $this->setIsSuspended(true); return false; } // -- $post_times = $crawler->filter('#stream-items-id li ._timestamp')->each(function (Crawler $node) { return $node->attr('data-time'); }); rsort($post_times); $last_post_time = $post_times[0]; $hour_difference = round((time() - $last_post_time) / 60 / 60, 2); if ($hour_difference > 24) { // if last post was later than 24 hours $this->setDoesntHaveRecentPosts(true); } }
/** * create and configure goutte client used for scraping * @return GoutteClient */ protected function getScrapClient() { $client = new GoutteClient(); $client->followRedirects(); $guzzleClient = new \GuzzleHttp\Client(array('curl' => array(CURLOPT_SSL_VERIFYHOST => false, CURLOPT_SSL_VERIFYPEER => false))); $client->setClient($guzzleClient); return $client; }
/** * create and configure goutte client used for scraping * @return GoutteClient */ protected function getScrapClient() { $client = new GoutteClient(); $client->followRedirects(); $guzzleClient = new \GuzzleHttp\Client(array('curl' => array(CURLOPT_SSL_VERIFYHOST => false, CURLOPT_SSL_VERIFYPEER => false, CURLOPT_PROXY => 'kuzh.polytechnique.fr:8080', CURLOPT_TIMEOUT => 7, CURLOPT_CONNECTTIMEOUT => 5))); $client->setClient($guzzleClient); return $client; }
/** * get moneyforward csv data * * @author egmc */ require __DIR__ . "/vendor/autoload.php"; use Goutte\Client; $login_url = 'https://moneyforward.com/users/sign_in'; $csv_url = 'https://moneyforward.com/cf/csv'; if ($argc < 3) { die("usage: php {$argv['0']} your_id your_password (optional)date[yyyy-mm]" . PHP_EOL); } $login_id = $argv[1]; $password = $argv[2]; $datetime = new DateTime(); if (isset($argv[3])) { $datetime = new DateTime($argv[3]); } $params = ['from' => $datetime->format('Y/m') . "/01", 'month' => $datetime->format('m'), 'year' => $datetime->format('Y')]; $client = new Client(); $crawler = $client->request('GET', $login_url); $form = $crawler->selectButton('commit')->form(); $crawler = $client->submit($form, ['user[email]' => $login_id, 'user[password]' => $password]); $query = http_build_query($params); $client->followRedirects(false); $client->request('GET', "{$csv_url}?{$query}"); if ($client->getResponse()->getStatus() == "200") { echo $client->getResponse()->getContent(); } else { die("failed to get csv data" . PHP_EOL); }
/** * @param FuzzingUri $uri_target */ public function attackFuzzedWithCsrf(FuzzingUri $uri_target) { $fuzz_target = $uri_target->getFuzzTarget(); $url = $this->_guzzle->getBaseUrl() . $uri_target->getUri(); $http_params = $fuzz_target->getParameters(); $userAndPassRefs = $this->getUserAndPassRefs($http_params); $usernames = $this->_easycredentials->getUsernames($this->NB_CREDENTIALS); $passwords = $this->_easycredentials->getPasswords($this->NB_CREDENTIALS); foreach ($usernames as $one_username) { $userAndPassRefs['username'] = $one_username; foreach ($passwords as $one_password) { /** * todo add to DB with array of values and entity->persist() ? * todo clariss verifier connexion + verification fichier * todo choix api guzzle && goutte == wtf ? */ $userAndPassRefs['password'] = $one_password; //create a client $client = new Client(); //set options $client->setHeader('User-Agent', "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36"); $client->followRedirects(true); //request csrf $crawler_csrf = $client->request('get', $url); $csrf_token = $crawler_csrf->filter('input[name="' . $uri_target->getCsrf() . '"]')->first()->extract('value')[0]; $http_params = array_merge($http_params, array($uri_target->getCsrf() => $csrf_token)); $client->followRedirects(true); $crawler_login = $client->request($uri_target->getHttpMethod(), $url, $http_params); if (preg_match($uri_target->getMatchSuccess(), $crawler_login->text())) { $this->report(true, "Bruteforced " . $url . " with user " . $one_username . " and password " . $one_password); } } } }
private function crawle($referer) { $client = new Client(); $client->followRedirects(); $crawler = $client->request('GET', $referer->getUrl()); $statut = $client->getResponse()->getStatus(); $referer->setStatus($statut); $referer->analysed = true; LogDebug::add($statut . " Request " . $referer->getUrl()); $referer->isDocument = true; foreach ($crawler->filter('a')->extract(array('href')) as $link) { LogDebug::add("Href: {$link}", "is_valid"); if (!$this->is_valid_url($link)) { LogDebug::add("-", "is_valid"); continue; } LogDebug::add("+", "is_valid"); $this->resources->add(new Resource($link, $referer), $referer); } }
/** * @group liveTest */ public function testRequestHostedEndpointLive() { $requestDTO = $this->generatePaymentRequestDTO(); $response = $this->giropayHostedService->requestHostedEndpoint($requestDTO); $this->assertInstanceOf("PHPCommerce\\Payment\\Dto\\PaymentResponseDTO", $response); $redirectUrl = $response->getResponseMap()[GiropayConstants::HOSTED_REDIRECT_URL]; $client = new Client(); $crawler = $client->request('GET', $redirectUrl); //bank login screen, login with test data $this->assertEquals(200, $client->getResponse()->getStatus()); $this->assertContains("Online-Banking: Anmelden", $client->getResponse()->__toString()); $form = $crawler->selectButton('Sicher anmelden')->form(); $crawler = $client->submit($form, array('account/addition[@name=benutzerkennung]' => 'sepatest1', 'ticket/pin' => '12345')); // //validation screen, just click next button $this->assertEquals(200, $client->getResponse()->getStatus()); $this->assertContains("Bitte wählen Sie eine Mobilfunknummer für den smsTAN-Versand", $client->getResponse()->__toString()); $form = $crawler->selectButton('weiterButton')->form(); $crawler = $client->submit($form, array()); // //tan screen, enter tan $this->assertEquals(200, $client->getResponse()->getStatus()); $this->assertContains("Bitte kontrollieren Sie vor der Eingabe der TAN die per SMS versandten Auftragsdaten", $client->getResponse()->__toString()); $form = $crawler->selectButton('absendenButton')->form(); $crawler = $client->submit($form, array('ticket/tan' => '123456')); // //success screen $this->assertEquals(200, $client->getResponse()->getStatus()); $this->assertContains("Der Auftrag wurde entgegengenommen", $client->getResponse()->getContent()->__toString()); $form = $crawler->selectButton('back2MerchantButton')->form(); $crawler = $client->submit($form, array()); // //redirect screen $this->assertEquals(200, $client->getResponse()->getStatus()); $this->assertContains("Die Rücksprungadresse zum Händler wird ermittelt", $client->getResponse()->getContent()->__toString()); //wait until the redirect address is populated sleep(3); $client->followRedirects(false); $form = $crawler->selectButton('go')->form(); $crawler = $client->submit($form, array()); $redirectUrl = $client->getResponse()->getHeader('Location'); // }