public function testIsAllowedToCrawl() { $whitelist = ["^http://example.com", "^https://facebook.com/user1"]; $blacklist = ['^(file|ftp|mailto):', '&type\\=rubriek', 'http://example.com/Some%20Thing/']; $this->assertTrue(UrlCheck::isAllowedToCrawl("http://example.com/Some%20Thing", "http://example.com/item/article1", $blacklist, $whitelist)); $this->assertFalse(UrlCheck::isAllowedToCrawl("http://example.com/Some%20Thing/article1", "http://example.com/item/article1", $blacklist, $whitelist)); $this->assertTrue(UrlCheck::isAllowedToCrawl("https://facebook.com/user1", "http://example.com/item/article1", $blacklist, $whitelist)); }
/** * Check if url form job is allowed to be crawled * * @return boolean */ public function isAllowedToCrawl() { return UrlCheck::isAllowedToCrawl($this->url, $this->baseUrl, $this->blacklist, $this->whitelist); }