Пример #1
0
 /**
  * Scan driver can decide about correct input data. If scan driver detect
  * not valid input source, scanning can be refused.
  * @return bool
  */
 public function validate()
 {
     foreach ($this->ignoredExtensions as $ext) {
         if (Strings::endsWith(strtolower($this->url), $ext)) {
             return false;
         }
     }
     return true;
 }
Пример #2
0
 /**
  * Scan one url for links ands update scanned and not scanned links.
  * @param IScanDriver $site
  * @param $startPage
  */
 protected function scanUrl(IScanDriver $site, $startPage)
 {
     if (!$site->validate()) {
         return;
     }
     $content = $site->getContent($site);
     /** @var \simple_html_dom $simpleDom */
     $simpleDom = HtmlDomParser::str_get_html($content);
     /** @var \simple_html_dom_node[] $links */
     $links = $simpleDom->find("a");
     foreach ($links as $link) {
         $siteUrl = $link->getAttribute("href");
         if (!$siteUrl) {
             continue;
         }
         $siteUrl = str_replace($startPage, '', $siteUrl);
         if (!Strings::startsWith($siteUrl, '/')) {
             continue;
         }
         $siteUrl = str_replace($site->getUrl(), "", $siteUrl);
         $siteUrl = rtrim($siteUrl, "/");
         $newSite = $site::fromUrl($startPage . $siteUrl);
         if (isset($this->scannedUrls[$newSite->getUrl()])) {
             continue;
         }
         $this->unscannedUrls[$newSite->getUrl()] = $newSite;
     }
     $this->scannedUrls[$site->getUrl()] = $site;
     unset($this->unscannedUrls[$site->getUrl()]);
 }