/** * * @return \webignition\WebResource\Sitemap\Sitemap */ protected function createSitemap() { $configuration = new SitemapConfiguration(); $configuration->setTypeToUrlExtractorClassMap(array('sitemaps.org.xml' => 'webignition\\WebResource\\Sitemap\\UrlExtractor\\SitemapsOrgXmlUrlExtractor', 'sitemaps.org.txt' => 'webignition\\WebResource\\Sitemap\\UrlExtractor\\SitemapsOrgTxtUrlExtractor', 'application/atom+xml' => 'webignition\\WebResource\\Sitemap\\UrlExtractor\\NewsFeedUrlExtractor', 'application/rss+xml' => 'webignition\\WebResource\\Sitemap\\UrlExtractor\\NewsFeedUrlExtractor', 'sitemaps.org.xml.index' => 'webignition\\WebResource\\Sitemap\\UrlExtractor\\SitemapsOrgXmlIndexUrlExtractor')); $sitemap = new Sitemap(); $sitemap->setConfiguration($configuration); return $sitemap; }
/** * * @param \webignition\WebResource\Sitemap\Sitemap $sitemap * @return boolean */ public function retrieve(Sitemap $sitemap) { if ($this->getConfiguration()->getShouldHalt()) { return false; } $request = clone $this->getConfiguration()->getBaseRequest(); $request->setUrl($sitemap->getUrl()); $this->setRequestCookies($request); $this->setRequestTimeout($request); $events = $this->getPreAndPostTransferEvents(); $this->dispatcher->dispatch(Events::TRANSFER_PRE, $events['pre']); $lastRequestException = null; try { $response = $request->send(); } catch (\Guzzle\Http\Exception\CurlException $curlException) { $lastRequestException = $curlException; } catch (\Guzzle\Http\Exception\RequestException $requestException) { $lastRequestException = $requestException; } $this->dispatcher->dispatch(Events::TRANSFER_POST, $events['post']); if ($lastRequestException instanceof \Exception || $response->getStatusCode() !== 200) { return false; } $sitemap->setHttpResponse($response); if ($sitemap->isIndex()) { $childUrls = $sitemap->getUrls(); foreach ($childUrls as $childUrl) { $childSitemap = new Sitemap(); $childSitemap->setConfiguration($sitemap->getConfiguration()); $childSitemap->setUrl($childUrl); $sitemap->addChild($childSitemap); if ($this->getConfiguration()->getRetrieveChildSitemaps()) { $this->retrieve($childSitemap); } } } return true; }