/** * Clear the settings of this widget * @return null */ public function clearWidgetProperties() { foreach ($this->settings as $key => $setting) { if (String::startsWith($key, $this->widgetSettingPrefix)) { unset($this->settings[$key]); } } }
/** * Gets the absolute URL of a made reference * @param string $url The made reference * @param string $baseUrl The base URL of the node which is linking the URL * @param string $basePath The base path of the node which is linking the URL * @return string The absolute URL of the reference */ protected function getAbsoluteUrl($url, $baseUrl, $basePath) { if (!String::looksLikeUrl($url)) { if (String::startsWith($url, '/')) { $url = rtrim($baseUrl, '/') . $url; } else { $url = $basePath . $url; } } return $url; }
/** * Convert the relative images to absolute ones * @param string $baseUrl base url for the relative images * @return null */ public function makeImagesAbsolute($baseUrl) { $images = $this->dom->getElementsByTagName('img'); foreach ($images as $image) { $src = $image->getAttribute('src'); if (String::startsWith($src, 'http://')) { continue; } $image->setAttribute('src', $baseUrl . $src); } }
/** * Get the request from Joppa, if none found pass the call to the fallback router * @return zibo\core\Request */ public function getRequest() { $query = $this->getQuery(); if (String::startsWith($query, Zibo::DIRECTORY_WEB . Request::QUERY_SEPARATOR)) { return $this->router->getRequest(); } $request = $this->getRequestFromQuery($query); if ($request) { return $request; } return $this->router->getRequest(); }
/** * Adds the URL's from the anchors in the page to the web * @param zibo\library\spider\Web $web The spider web * @param zibo\library\spider\WebNode $prey The current prey in the web * @param string $baseUrl Base URL of the crawl * @param string $preyBaseUrl Base URL of the prey * @param zibo\library\xml\dom\Document $dom The DOM document of the current prey * @return null */ protected function biteDocument(Web $web, WebNode $prey, $baseUrl, $preyBaseUrl, Document $dom) { $anchors = $dom->getElementsByTagName('a'); foreach ($anchors as $anchor) { $url = $anchor->getAttribute('href'); if (!$url || String::startsWith($url, '#')) { continue; } if (!String::startsWith($url, 'mailto:')) { $url = $this->getAbsoluteUrl($url, $baseUrl, $preyBaseUrl); } $node = $web->getNode($url); $node->addReference($prey); $prey->addLink($node); } }
/** * Gets the drive prefix of a path (/, C:/) * @param string $path * @return boolean|string the drive prefix if an absolute path, false otherwise */ private function getDrivePrefix($path) { if (!$path) { return false; } $pathLength = strlen($path); if ($pathLength == 1) { if ($path == File::DIRECTORY_SEPARATOR) { return File::DIRECTORY_SEPARATOR; } return false; } if ($path[0] == File::DIRECTORY_SEPARATOR) { if ($pathLength >= 3 && $this->isDrive($path[1]) && $path[2] == File::DIRECTORY_SEPARATOR) { return substr($path, 0, 3); } return File::DIRECTORY_SEPARATOR; } $drive = $path[0]; if ($this->isDrive($drive) && String::startsWith($path, $drive . ':/')) { return $drive . ':/'; } return false; }
public function performCrawl() { $socket = $this->connect(); $this->response = $this->performHead($socket); $responseCode = $this->response->getResponseCode(); if (!$responseCode) { throw new Exception('No response received'); } if ($this->response->isRedirect()) { return; } $contentType = $this->response->getHeader('Content-Type'); if ($responseCode == 200 && String::startsWith($contentType, 'text/')) { $this->performGet($this->response); } fclose($socket); }
/** * Checks if a path has been prefixed with the phar protocol (phar://) * @param string $path if none provided, the path of the file is assumed * @return boolean true if the protocol is prefixed, false otherwise */ public function hasPharProtocol($path = null) { if ($path == null) { $path = $this->path; } return String::startsWith($path, 'phar://'); }
/** * Process the source, apply thumbnailer if set * @param string $source source to process * @return string source to be used by the html of this image tag * @throws zibo\ZiboException when the source file could not be found */ private function processSource($source) { $fileSource = new File($source); if (!$fileSource->isAbsolute() && !String::startsWith($fileSource->getPath(), Zibo::DIRECTORY_APPLICATION . File::DIRECTORY_SEPARATOR)) { $fileSource = Zibo::getInstance()->getFile($fileSource->getPath()); if (!$fileSource) { throw new ZiboException('Could not find ' . $source); } } $fileDestination = $this->getCacheFile($fileSource); if (!$fileDestination->exists() || $fileSource->getModificationTime() > $fileDestination->getModificationTime()) { $image = new CoreImage($fileSource); if ($this->thumbnailer) { $thumbnail = $image->thumbnail($this->thumbnailer, $this->thumbnailWidth, $this->thumbnailHeight); if ($image === $thumbnail) { $fileSource->copy($fileDestination); } else { $thumbnail->write($fileDestination); } } else { $fileSource->copy($fileDestination); } } // remove application/ from the path return substr($fileDestination->getPath(), 12); }
/** * Checks whether the provided string looks like an HTTP URL * @param string $string String to check * @return boolean True when the string starts with http:// or https://, false otherwise */ public static function looksLikeUrl($string) { if (String::startsWith($string, 'http://') || String::startsWith($string, 'https://')) { return true; } return false; }
/** * Bites a prey to gather the needed information * @param WebNode $prey The current node to check * @param string $preyBaseUrl The base URL of the prey * @param string $preyBasePath The base path of the prey * @return null */ private function bite(WebNode $prey, $preyBaseUrl, $preyBasePath) { $dom = null; $response = $prey->getResponse(); $contentType = $response->getHeader('Content-Type'); $content = $response->getContent(); if (String::startsWith($contentType, 'text/html') && $content) { $dom = new Document('1.0', 'utf8'); try { $result = @$dom->loadHTML($content); if (!$result) { $error = error_get_last(); throw new Exception($error['message']); } } catch (Exception $exception) { $prey->setError($exception->getMessage()); } } foreach ($this->bites as $bite) { $bite->bite($this->web, $prey, $preyBaseUrl, $preyBasePath, $dom); } }