/** * @dataProvider generateDataForTest * @param string $uri * @param string $userAgent */ public function testDelaySQL($uri, $userAgent) { $pdo = new PDO($GLOBALS['DB_DSN'], $GLOBALS['DB_USER'], $GLOBALS['DB_PASSWD']); $pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_SILENT); $parser = new RobotsTxtParser\UriClient($uri); $this->assertTrue(is_numeric($parser->userAgent($userAgent)->crawlDelay()->handle($pdo)->checkQueue())); $this->assertTrue(is_numeric($parser->userAgent($userAgent)->crawlDelay()->handle($pdo)->getTimeSleepUntil())); $this->assertTrue(is_numeric($parser->userAgent($userAgent)->crawlDelay()->handle($pdo)->sleep())); $delayHandler = new RobotsTxtParser\Delay($pdo); $this->assertInstanceOf('vipnytt\\RobotsTxtParser\\Delay', $delayHandler); $this->assertFalse($pdo->getAttribute(PDO::ATTR_ERRMODE) === PDO::ERRMODE_SILENT); $client = $delayHandler->client($parser->userAgent($userAgent)->crawlDelay()); $this->assertInstanceOf('vipnytt\\RobotsTxtParser\\Client\\Delay\\ClientInterface', $client); $this->assertTrue(is_numeric($client->getTimeSleepUntil())); $this->assertTrue(is_numeric($client->checkQueue())); $start = microtime(true); $sleepTime = $client->sleep(); $stop = microtime(true); $this->assertTrue($sleepTime >= $stop - $start - 1 && $sleepTime <= $stop - $start + 1); $this->assertTrue(is_array($delayHandler->getTopWaitTimes())); $client->reset(); $this->assertTrue($client->getTimeSleepUntil() === 0); if ($parser->userAgent($userAgent)->crawlDelay()->getValue() > 0) { $client->reset(60); $queue = $client->checkQueue(); $this->assertLessThanOrEqual(60, $queue); $this->assertGreaterThan(59, $queue); $debug = $delayHandler->debug($uri); $this->assertTrue(count($debug[strtolower($userAgent)], COUNT_NORMAL) >= 3); } $client->reset(); $delayHandler->clean(); }
/** * Update an robots.txt in the database * * @param UriClient $client * @param int|null $worker * @return bool */ private function push(UriClient $client, $worker = 0) { $base = $client->getBaseUri(); $statusCode = $client->getStatusCode(); $nextUpdate = $client->nextUpdate(); $effective = ($effective = $client->getEffectiveUri()) === $base ? null : $effective; if (strpos($base, 'http') === 0 && ($statusCode === null || $statusCode >= 500 && $statusCode < 600) && $this->displacePush($base, $nextUpdate, $worker)) { return true; } $validUntil = $client->validUntil(); $content = $client->render()->compressed(PHP_EOL); $query = $this->pdo->prepare(<<<SQL INSERT INTO robotstxt__cache1 (base, content, statusCode, validUntil, nextUpdate, effective) VALUES (:base, :content, :statusCode, :validUntil, :nextUpdate, :effective) ON DUPLICATE KEY UPDATE content = :content, statusCode = :statusCode, validUntil = :validUntil, nextUpdate = :nextUpdate, effective = :effective, worker = :worker; SQL ); $query->bindParam(':base', $base, PDO::PARAM_STR); $query->bindParam(':content', $content, PDO::PARAM_STR); $query->bindParam(':statusCode', $statusCode, PDO::PARAM_INT | PDO::PARAM_NULL); $query->bindParam(':validUntil', $validUntil, PDO::PARAM_INT); $query->bindParam(':nextUpdate', $nextUpdate, PDO::PARAM_INT); $query->bindParam(':effective', $effective, PDO::PARAM_STR | PDO::PARAM_NULL); $query->bindParam(':worker', $worker, PDO::PARAM_INT | PDO::PARAM_NULL); return $query->execute(); }