/** * Adds the imported style sheets to the web * @param zibo\library\spider\Web $web The spider web * @param zibo\library\spider\WebNode $prey The current prey in the web * @param string $baseUrl Base URL of the crawl * @param string $preyBaseUrl Base URL of the prey * @param zibo\library\xml\dom\Document $dom The DOM document of the current prey * @return null */ public function bite(Web $web, WebNode $prey, $baseUrl, $preyBaseUrl, Document $dom = null) { if (!$prey->hasType(WebNode::TYPE_CSS)) { return; } $response = $prey->getResponse(); if (!$response || $response->getResponseCode() != 200) { return; } $source = $response->getContent(); if (!$source) { return; } $urls = $this->getImportUrlsFromStyle($source, $baseUrl, $preyBaseUrl); foreach ($urls as $url) { $link = $web->getNode($url); $link->addType(WebNode::TYPE_CSS); $link->addReference($prey); $prey->addLink($link); } }
/** * Bites a prey to gather the needed information * @param WebNode $prey The current node to check * @param string $preyBaseUrl The base URL of the prey * @param string $preyBasePath The base path of the prey * @return null */ private function bite(WebNode $prey, $preyBaseUrl, $preyBasePath) { $dom = null; $response = $prey->getResponse(); $contentType = $response->getHeader('Content-Type'); $content = $response->getContent(); if (String::startsWith($contentType, 'text/html') && $content) { $dom = new Document('1.0', 'utf8'); try { $result = @$dom->loadHTML($content); if (!$result) { $error = error_get_last(); throw new Exception($error['message']); } } catch (Exception $exception) { $prey->setError($exception->getMessage()); } } foreach ($this->bites as $bite) { $bite->bite($this->web, $prey, $preyBaseUrl, $preyBasePath, $dom); } }