protected function _check($ip, $userAgent) { if (Http::getQuery($this->_trapName) && !Validate::isGoogleBot()) { $isBadCrawler = false; $isGoodCrawler = false; if ($this->_badCrawlerFile) { $badCrawlerXml = simplexml_load_file($this->_badCrawlerFile); if (is_null($badCrawlerXml) || !$badCrawlerXml) { throw new \Exception('Invalid xml file : "' . $this->_badCrawlerFile . '"'); } } if ($this->_goodCrawlerFile) { $goodCrawlerXml = simplexml_load_file($this->_goodCrawlerFile); if (is_null($goodCrawlerXml) || !$goodCrawlerXml) { throw new \Exception('Invalid xml file : "' . $this->_goodCrawlerFile . '"'); } } if ($badCrawlerXml) { $badCrawlerList = $badCrawlerXml->crawler; foreach ($badCrawlerList as $crawler) { if (isset($crawler->ip) && (string) $crawler->ip == $ip) { $isBadCrawler = true; } if (isset($crawler->userAgent) && strripos((string) $crawler->userAgent, $userAgent) !== false) { $isBadCrawler = true; } if ($isBadCrawler) { $this->_catch($ip, $userAgent, self::CRAWLER_BAD); Session::getInstance()->add(md5($ip . 'badcrawler'), true, true, true); Router::getInstance()->show403(true); break; } } unset($crawler); } if ($goodCrawlerXml) { $goodCrawlerList = $goodCrawlerXml->crawler; foreach ($goodCrawlerList as $crawler) { if (isset($crawler->ip) && (string) $crawler->ip == $ip) { $isGoodCrawler = true; } if (isset($crawler->userAgent) && strripos((string) $crawler->userAgent, $userAgent) !== false) { $isGoodCrawler = true; } if ($isGoodCrawler) { $this->_catch($ip, $userAgent, self::CRAWLER_BAD); break; } } unset($crawler); } // unknown if (!$isBadCrawler && !$isGoodCrawler) { $this->_catch($ip, $userAgent, self::CRAWLER_BAD); } } }