/** * @covers VDB\Spider\Resource */ public function testSerialization() { $serialized = serialize($this->resource); $unserialized = unserialize($serialized); $this->assertInstanceOf('VDB\\Spider\\Resource', $unserialized); $this->assertInstanceOf('Psr\\Http\\Message\\ResponseInterface', $unserialized->getResponse()); $this->assertInstanceOf('VDB\\Spider\\Uri\\DiscoveredUri', $unserialized->getUri()); $this->assertEquals($this->resource->getUri()->__toString(), $unserialized->getUri()->__toString()); $this->assertEquals($this->html, $unserialized->getResponse()->getBody()->__toString()); $this->assertEquals($this->resource->getCrawler()->html(), $unserialized->getCrawler()->html()); }
public function persist(Resource $resource) { $fileName = urlencode($resource->getUri()->toString()); $file = new \SplFileObject($this->getResultPath() . $fileName, 'w'); $rawResponse = $resource->getResponse()->__toString(); $this->totalSizePersisted += $file->fwrite($rawResponse); }
/** * @param Resource $resource * @return UriInterface[] */ public function discover(Resource $resource) { $this->markSeen($resource->getUri()); if ($this->isAtMaxDepth($resource->getUri())) { return []; } $discoveredUris = []; foreach ($this->discoverers as $discoverer) { $discoveredUris = array_merge($discoveredUris, $discoverer->discover($resource)); } $this->normalize($discoveredUris); $this->removeDuplicates($discoveredUris); $this->filterAlreadySeen($discoveredUris); $this->filter($discoveredUris); foreach ($discoveredUris as $uri) { $uri->setDepthFound($resource->getUri()->getDepthFound() + 1); $this->markSeen($uri); } return $discoveredUris; }
/** * @param Spider $spider * @param Resource $document * @return UriInterface[] */ public function discover(Spider $spider, Resource $document) { $crawler = $document->getCrawler()->filter($this->cssSelector); $uris = array(); foreach ($crawler as $node) { try { $uris[] = new Uri($node->getAttribute('href'), $document->getUri()->toString()); } catch (UriSyntaxException $e) { $spider->getStatsHandler()->addToFailed($node->getAttribute('href'), 'Invalid URI: ' . $e->getMessage()); } } return $uris; }
/** * @covers VDB\Spider\Resource::getUri */ public function testGetUri() { $this->assertInstanceOf('VDB\\Spider\\Uri\\DiscoveredUri', $this->resource->getUri()); $this->assertEquals('http://example.org/domains/special', $this->resource->getUri()->toString()); }