/**
  * Adds all the images from the page to the web
  * @param zibo\library\spider\Web $web The spider web
  * @param zibo\library\spider\WebNode $prey The current prey in the web
  * @param string $baseUrl Base URL of the crawl
  * @param string $preyBaseUrl Base URL of the prey
  * @param zibo\library\xml\dom\Document $dom The DOM document of the current prey
  * @return null
  */
 protected function biteDocument(Web $web, WebNode $prey, $baseUrl, $preyBaseUrl, Document $dom)
 {
     $images = $dom->getElementsByTagName('img');
     foreach ($images as $image) {
         $url = $image->getAttribute('src');
         if (!$url) {
             continue;
         }
         $url = $this->getAbsoluteUrl($url, $baseUrl, $preyBaseUrl);
         $link = $web->getNode($url);
         $link->addType(WebNode::TYPE_IMAGE);
         $link->addReference($prey);
         $prey->addLink($link);
     }
 }
示例#2
0
 /**
  * Adds all the used javascripts to the web
  * @param zibo\library\spider\Web $web The spider web
  * @param zibo\library\spider\WebNode $prey The current prey in the web
  * @param string $baseUrl Base URL of the crawl
  * @param string $preyBaseUrl Base URL of the prey
  * @param zibo\library\xml\dom\Document $dom The DOM document of the current prey
  * @return null
  */
 protected function biteDocument(Web $web, WebNode $prey, $baseUrl, $preyBaseUrl, Document $dom)
 {
     $scripts = $dom->getElementsByTagName('script');
     foreach ($scripts as $script) {
         $type = $script->getAttribute('type');
         $url = $script->getAttribute('src');
         if ($type != 'text/javascript' || !$url) {
             continue;
         }
         $url = $this->getAbsoluteUrl($url, $baseUrl, $preyBaseUrl);
         $link = $web->getNode($url);
         $link->addType(WebNode::TYPE_JS);
         $link->addReference($prey);
         $prey->addLink($link);
     }
 }
 /**
  * Adds the URL's from the anchors in the page to the web
  * @param zibo\library\spider\Web $web The spider web
  * @param zibo\library\spider\WebNode $prey The current prey in the web
  * @param string $baseUrl Base URL of the crawl
  * @param string $preyBaseUrl Base URL of the prey
  * @param zibo\library\xml\dom\Document $dom The DOM document of the current prey
  * @return null
  */
 protected function biteDocument(Web $web, WebNode $prey, $baseUrl, $preyBaseUrl, Document $dom)
 {
     $anchors = $dom->getElementsByTagName('a');
     foreach ($anchors as $anchor) {
         $url = $anchor->getAttribute('href');
         if (!$url || String::startsWith($url, '#')) {
             continue;
         }
         if (!String::startsWith($url, 'mailto:')) {
             $url = $this->getAbsoluteUrl($url, $baseUrl, $preyBaseUrl);
         }
         $node = $web->getNode($url);
         $node->addReference($prey);
         $prey->addLink($node);
     }
 }
示例#4
0
 /**
  * Adds the used style sheets to the web
  * @param zibo\library\spider\Web $web The spider web
  * @param zibo\library\spider\WebNode $prey The current prey in the web
  * @param string $baseUrl Base URL of the crawl
  * @param string $preyBaseUrl Base URL of the prey
  * @param zibo\library\xml\dom\Document $dom The DOM document of the current prey
  * @return null
  */
 protected function biteDocument(Web $web, WebNode $prey, $baseUrl, $preyBaseUrl, Document $dom)
 {
     $links = $dom->getElementsByTagName('link');
     foreach ($links as $link) {
         $type = $link->getAttribute('type');
         $rel = $link->getAttribute('rel');
         $url = $link->getAttribute('href');
         if ($type != 'text/css' || $rel != 'stylesheet' || !$url) {
             continue;
         }
         $url = $this->getAbsoluteUrl($url, $baseUrl, $preyBaseUrl);
         $link = $web->getNode($url);
         $link->addType(WebNode::TYPE_CSS);
         $link->addReference($prey);
         $prey->addLink($link);
     }
 }
 /**
  * Reads the dependencies from the provided file and adds them to the
  * provided container
  * @param zibo\core\di\DependencyContainer $container
  * @param zibo\library\filesystem\File $file
  * @return null
  */
 private function readDependencies(DependencyContainer $container, File $file)
 {
     $dom = new Document();
     $dom->load($file);
     $dependencyElements = $dom->getElementsByTagName(self::TAG_DEPENDENCY);
     foreach ($dependencyElements as $dependencyElement) {
         $interface = $dependencyElement->getAttribute(self::ATTRIBUTE_INTERFACE);
         $className = $dependencyElement->getAttribute(self::ATTRIBUTE_CLASS);
         $id = $dependencyElement->getAttribute(self::ATTRIBUTE_ID);
         if (!$id) {
             $id = null;
         }
         $dependency = new Dependency($className, $id);
         $this->readCalls($dependency, $dependencyElement);
         $container->addDependency($interface, $dependency);
     }
 }