Ejemplo n.º 1
0
 /**
  * @dataProvider generateDataForTest
  * @param string $uri
  * @param string $userAgent
  */
 public function testDelaySQL($uri, $userAgent)
 {
     $pdo = new PDO($GLOBALS['DB_DSN'], $GLOBALS['DB_USER'], $GLOBALS['DB_PASSWD']);
     $pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_SILENT);
     $parser = new RobotsTxtParser\UriClient($uri);
     $this->assertTrue(is_numeric($parser->userAgent($userAgent)->crawlDelay()->handle($pdo)->checkQueue()));
     $this->assertTrue(is_numeric($parser->userAgent($userAgent)->crawlDelay()->handle($pdo)->getTimeSleepUntil()));
     $this->assertTrue(is_numeric($parser->userAgent($userAgent)->crawlDelay()->handle($pdo)->sleep()));
     $delayHandler = new RobotsTxtParser\Delay($pdo);
     $this->assertInstanceOf('vipnytt\\RobotsTxtParser\\Delay', $delayHandler);
     $this->assertFalse($pdo->getAttribute(PDO::ATTR_ERRMODE) === PDO::ERRMODE_SILENT);
     $client = $delayHandler->client($parser->userAgent($userAgent)->crawlDelay());
     $this->assertInstanceOf('vipnytt\\RobotsTxtParser\\Client\\Delay\\ClientInterface', $client);
     $this->assertTrue(is_numeric($client->getTimeSleepUntil()));
     $this->assertTrue(is_numeric($client->checkQueue()));
     $start = microtime(true);
     $sleepTime = $client->sleep();
     $stop = microtime(true);
     $this->assertTrue($sleepTime >= $stop - $start - 1 && $sleepTime <= $stop - $start + 1);
     $this->assertTrue(is_array($delayHandler->getTopWaitTimes()));
     $client->reset();
     $this->assertTrue($client->getTimeSleepUntil() === 0);
     if ($parser->userAgent($userAgent)->crawlDelay()->getValue() > 0) {
         $client->reset(60);
         $queue = $client->checkQueue();
         $this->assertLessThanOrEqual(60, $queue);
         $this->assertGreaterThan(59, $queue);
         $debug = $delayHandler->debug($uri);
         $this->assertTrue(count($debug[strtolower($userAgent)], COUNT_NORMAL) >= 3);
     }
     $client->reset();
     $delayHandler->clean();
 }
Ejemplo n.º 2
0
    /**
     * Update an robots.txt in the database
     *
     * @param UriClient $client
     * @param int|null $worker
     * @return bool
     */
    private function push(UriClient $client, $worker = 0)
    {
        $base = $client->getBaseUri();
        $statusCode = $client->getStatusCode();
        $nextUpdate = $client->nextUpdate();
        $effective = ($effective = $client->getEffectiveUri()) === $base ? null : $effective;
        if (strpos($base, 'http') === 0 && ($statusCode === null || $statusCode >= 500 && $statusCode < 600) && $this->displacePush($base, $nextUpdate, $worker)) {
            return true;
        }
        $validUntil = $client->validUntil();
        $content = $client->render()->compressed(PHP_EOL);
        $query = $this->pdo->prepare(<<<SQL
INSERT INTO robotstxt__cache1 (base, content, statusCode, validUntil, nextUpdate, effective)
VALUES (:base, :content, :statusCode, :validUntil, :nextUpdate, :effective)
ON DUPLICATE KEY UPDATE content = :content, statusCode = :statusCode, validUntil = :validUntil,
  nextUpdate = :nextUpdate, effective = :effective, worker = :worker;
SQL
);
        $query->bindParam(':base', $base, PDO::PARAM_STR);
        $query->bindParam(':content', $content, PDO::PARAM_STR);
        $query->bindParam(':statusCode', $statusCode, PDO::PARAM_INT | PDO::PARAM_NULL);
        $query->bindParam(':validUntil', $validUntil, PDO::PARAM_INT);
        $query->bindParam(':nextUpdate', $nextUpdate, PDO::PARAM_INT);
        $query->bindParam(':effective', $effective, PDO::PARAM_STR | PDO::PARAM_NULL);
        $query->bindParam(':worker', $worker, PDO::PARAM_INT | PDO::PARAM_NULL);
        return $query->execute();
    }