protected function initCrawlerProcess() { parent::initCrawlerProcess(); // Add additional URLs to crawl to the crawler's LinkCache // NOTE: This is using an undocumented API if ($extraURLs = $this->urlList->getExtraCrawlURLs()) { foreach ($extraURLs as $extraURL) { $this->LinkCache->addUrl(new PHPCrawlerURLDescriptor($extraURL)); } } // Prevent URLs that matches the exclude patterns to be fetched if ($excludePatterns = $this->urlList->getExcludePatterns()) { foreach ($excludePatterns as $pattern) { $validRegExp = $this->addURLFilterRule('|' . str_replace('|', '\\|', $pattern) . '|'); if (!$validRegExp) { throw new InvalidArgumentException('Exclude url pattern "' . $pattern . '" is not a valid regular expression.'); } } } }