/** * * @return \webignition\WebResource\Sitemap\Sitemap */ protected function createSitemap() { $configuration = new SitemapConfiguration(); $configuration->setTypeToUrlExtractorClassMap(array('sitemaps.org.xml' => 'webignition\\WebResource\\Sitemap\\UrlExtractor\\SitemapsOrgXmlUrlExtractor', 'sitemaps.org.txt' => 'webignition\\WebResource\\Sitemap\\UrlExtractor\\SitemapsOrgTxtUrlExtractor', 'application/atom+xml' => 'webignition\\WebResource\\Sitemap\\UrlExtractor\\NewsFeedUrlExtractor', 'application/rss+xml' => 'webignition\\WebResource\\Sitemap\\UrlExtractor\\NewsFeedUrlExtractor', 'sitemaps.org.xml.index' => 'webignition\\WebResource\\Sitemap\\UrlExtractor\\SitemapsOrgXmlIndexUrlExtractor')); $sitemap = new Sitemap(); $sitemap->setConfiguration($configuration); return $sitemap; }
/** * * @return array */ public function getUrls() { if (is_null($this->urls)) { $this->urls = array(); $extractorClass = $this->configuration->getExtractorClassForType($this->getType()); if (is_null($extractorClass)) { return array(); } $extractor = new $extractorClass(); $urls = $extractor->extract($this->getContent()); foreach ($urls as $url) { $normalisedUrl = (string) new NormalisedUrl($url); if (!array_key_exists($normalisedUrl, $this->urls)) { $this->urls[$normalisedUrl] = true; } } } return array_keys($this->urls); }