Esempio n. 1
0
 protected function _check($ip, $userAgent)
 {
     if (Http::getQuery($this->_trapName) && !Validate::isGoogleBot()) {
         $isBadCrawler = false;
         $isGoodCrawler = false;
         if ($this->_badCrawlerFile) {
             $badCrawlerXml = simplexml_load_file($this->_badCrawlerFile);
             if (is_null($badCrawlerXml) || !$badCrawlerXml) {
                 throw new \Exception('Invalid xml file : "' . $this->_badCrawlerFile . '"');
             }
         }
         if ($this->_goodCrawlerFile) {
             $goodCrawlerXml = simplexml_load_file($this->_goodCrawlerFile);
             if (is_null($goodCrawlerXml) || !$goodCrawlerXml) {
                 throw new \Exception('Invalid xml file : "' . $this->_goodCrawlerFile . '"');
             }
         }
         if ($badCrawlerXml) {
             $badCrawlerList = $badCrawlerXml->crawler;
             foreach ($badCrawlerList as $crawler) {
                 if (isset($crawler->ip) && (string) $crawler->ip == $ip) {
                     $isBadCrawler = true;
                 }
                 if (isset($crawler->userAgent) && strripos((string) $crawler->userAgent, $userAgent) !== false) {
                     $isBadCrawler = true;
                 }
                 if ($isBadCrawler) {
                     $this->_catch($ip, $userAgent, self::CRAWLER_BAD);
                     Session::getInstance()->add(md5($ip . 'badcrawler'), true, true, true);
                     Router::getInstance()->show403(true);
                     break;
                 }
             }
             unset($crawler);
         }
         if ($goodCrawlerXml) {
             $goodCrawlerList = $goodCrawlerXml->crawler;
             foreach ($goodCrawlerList as $crawler) {
                 if (isset($crawler->ip) && (string) $crawler->ip == $ip) {
                     $isGoodCrawler = true;
                 }
                 if (isset($crawler->userAgent) && strripos((string) $crawler->userAgent, $userAgent) !== false) {
                     $isGoodCrawler = true;
                 }
                 if ($isGoodCrawler) {
                     $this->_catch($ip, $userAgent, self::CRAWLER_BAD);
                     break;
                 }
             }
             unset($crawler);
         }
         // unknown
         if (!$isBadCrawler && !$isGoodCrawler) {
             $this->_catch($ip, $userAgent, self::CRAWLER_BAD);
         }
     }
 }