public function testFragment() { $this->assertTrue(Uri::isAbsolute('http:foo/bar#abc')); $this->assertTrue(Uri::isAbsolute('#abc'), 'fragment URL should be considered absolute URI because they msut not be modified at all'); }
/** * Fix relative URL to absolute URL */ private function infoRelativeToAbsolute() { // Find out the absolute domain. If specified in HTML source, use it as is. if (preg_match('|<base[^>]*href="([^"]*)"[^>]*/>|i', $this->content, $match)) { $absoluteDomain = $match[1]; } else { $absoluteDomain = $this->newsletter->getBaseUrl() . '/'; } $urlPatterns = ['hyperlinks' => '/<a [^>]*href="(.*)"/Ui', 'stylesheets' => '/<link [^>]*href="(.*)"/Ui', 'images' => '/ src="(.*)"/Ui', 'background images' => '/ background="(.*)"/Ui']; foreach ($urlPatterns as $type => $urlPattern) { preg_match_all($urlPattern, $this->content, $urls); $replacementCount = 0; foreach ($urls[1] as $i => $url) { // If this is already an absolute link, dont replace it $decodedUrl = html_entity_decode($url); if (!Uri::isAbsolute($decodedUrl)) { $replace_url = str_replace($decodedUrl, $absoluteDomain . ltrim($decodedUrl, '/'), $urls[0][$i]); $this->content = str_replace($urls[0][$i], $replace_url, $this->content); ++$replacementCount; } } if ($replacementCount) { $this->infos[] = sprintf($this->lang->getLL('validation_mail_converted_relative_url'), $type); } } }
/** * Returns the content of the newsletter with validation messages. The content * is also "fixed" automatically when possible. * @param Newsletter $newsletter * @param string $language language of the content of the newsletter (the 'L' parameter in TYPO3 URL) * @return array ('content' => $content, 'errors' => $errors, 'warnings' => $warnings, 'infos' => $infos); */ public function validate(Newsletter $newsletter, $language = null) { $this->initializeLang(); // We need to catch the exception if domain was not found/configured properly try { $url = $newsletter->getContentUrl($language); } catch (Exception $e) { return array('content' => '', 'errors' => array($e->getMessage()), 'warnings' => array(), 'infos' => array()); } $content = $this->getURL($url); $errors = array(); $warnings = array(); $infos = array(sprintf($this->lang->getLL('validation_content_url'), '<a target="_blank" href="' . $url . '">' . $url . '</a>')); // Content should be more that just a few characters. Apache error propably occured if (strlen($content) < 200) { $errors[] = $this->lang->getLL('validation_mail_too_short'); } // Content should not contain PHP-Warnings if (substr($content, 0, 22) == "<br />\n<b>Warning</b>:") { $errors[] = $this->lang->getLL('validation_mail_contains_php_warnings'); } // Content should not contain PHP-Warnings if (substr($content, 0, 26) == "<br />\n<b>Fatal error</b>:") { $errors[] = $this->lang->getLL('validation_mail_contains_php_errors'); } // If the page contains a "Pages is being generared" text... this is bad too if (strpos($content, 'Page is being generated.') && strpos($content, 'If this message does not disappear within')) { $errors[] = $this->lang->getLL('validation_mail_being_generated'); } // Find out the absolute domain. If specified in HTML source, use it as is. if (preg_match('|<base[^>]*href="([^"]*)"[^>]*/>|i', $content, $match)) { $absoluteDomain = $match[1]; } else { $absoluteDomain = $newsletter->getBaseUrl() . '/'; } // Fix relative URL to absolute URL $urlPatterns = array('hyperlinks' => '/<a [^>]*href="(.*)"/Ui', 'stylesheets' => '/<link [^>]*href="(.*)"/Ui', 'images' => '/ src="(.*)"/Ui', 'background images' => '/ background="(.*)"/Ui'); foreach ($urlPatterns as $type => $urlPattern) { preg_match_all($urlPattern, $content, $urls); $replacementCount = 0; foreach ($urls[1] as $i => $url) { // If this is already an absolute link, dont replace it $decodedUrl = html_entity_decode($url); if (!Uri::isAbsolute($decodedUrl)) { $replace_url = str_replace($decodedUrl, $absoluteDomain . ltrim($decodedUrl, '/'), $urls[0][$i]); $content = str_replace($urls[0][$i], $replace_url, $content); ++$replacementCount; } } if ($replacementCount) { $infos[] = sprintf($this->lang->getLL('validation_mail_converted_relative_url'), $type); } } // Find linked css and convert into a style-tag preg_match_all('|<link rel="stylesheet" type="text/css" href="([^"]+)"[^>]+>|Ui', $content, $urls); foreach ($urls[1] as $i => $url) { $content = str_replace($urls[0][$i], "<!-- fetched URL: {$url} -->\n<style type=\"text/css\">\n<!--\n" . $this->getURL($url) . "\n-->\n</style>", $content); } if (count($urls[1])) { $infos[] = $this->lang->getLL('validation_mail_contains_linked_styles'); } // We cant very well have attached javascript in a newsmail ... removing $content = preg_replace('|<script[^>]*type="text/javascript"[^>]*>[^<]*</script>|i', '', $content, -1, $count); if ($count) { $warnings[] = $this->lang->getLL('validation_mail_contains_javascript'); } // Images in CSS if (preg_match('|background-image: url\\([^\\)]+\\)|', $content) || preg_match('|list-style-image: url\\([^\\)]+\\)|', $content)) { $errors[] = $this->lang->getLL('validation_mail_contains_css_images'); } // CSS-classes if (preg_match('|<[a-z]+ [^>]*class="[^"]+"[^>]*>|', $content)) { $warnings[] = $this->lang->getLL('validation_mail_contains_css_classes'); } // Positioning & element sizes in CSS $forbiddenCssProperties = array('width' => '((min|max)+-)?width', 'height' => '((min|max)+-)?height', 'margin' => 'margin(-(bottom|left|right|top)+)?', 'padding' => 'padding(-(bottom|left|right|top)+)?', 'position' => 'position'); $forbiddenCssPropertiesWarnings = array(); if (preg_match_all('|<[a-z]+[^>]+style="([^"]*)"|', $content, $matches)) { foreach ($matches[1] as $stylepart) { foreach ($forbiddenCssProperties as $property => $regex) { if (preg_match('/(^|[^\\w-])' . $regex . '[^\\w-]/', $stylepart)) { $forbiddenCssPropertiesWarnings[$property] = $property; } } } foreach ($forbiddenCssPropertiesWarnings as $property) { $warnings[] = sprintf($this->lang->getLL('validation_mail_contains_css_some_property'), $property); } } return array('content' => $content, 'errors' => $errors, 'warnings' => $warnings, 'infos' => $infos); }
/** * Fetch and returns the content at specified URL * * @param string $url * @return string */ public static function getUrl($url) { // Specify User-Agent header if we fetch an URL, but not if it's a file on disk if (Utility\Uri::isAbsolute($url)) { $headers = [self::getUserAgent()]; } else { $headers = null; } $report = []; $content = GeneralUtility::getUrl($url, 0, $headers, $report); // Throw Exception if content could not be fetched so that it is properly caught in Validator if ($content === false) { throw new \Exception('Could not fetch "' . $url . '"' . PHP_EOL . 'Error: ' . $report['error'] . PHP_EOL . 'Message: ' . $report['message']); } return $content; }
/** * Fetch and returns the content at specified URL * @param string $url * @return string */ public static function getUrl($url) { // Specify User-Agent header if we fetch an URL, but not if it's a file on disk if (Utility\Uri::isAbsolute($url)) { return \TYPO3\CMS\Core\Utility\GeneralUtility::getURL($url, 0, array(self::getUserAgent())); } else { return \TYPO3\CMS\Core\Utility\GeneralUtility::getURL($url); } }