/** * Clean an HTML string. * * Performs the following functions: * - Removes any unsafe tags. * - For allowed tags that specify URL attributes, transforms relative URLs to absolute URLs. * * @param array|bool $allowed_tags An array of allow tags, or true for the internal list of safe tags. * @return string The cleaned HTML string. */ public function clean($allowed_tags = true) { static $abs_urls = []; preg_match_all('#' . static::EXTRACT_HTMLTAG . '#iums', $this->val, $input_html); if (empty($input_html[0])) { $this->val = static::escape_html($this->val); return; } $input_text = preg_split('#' . static::EXTRACT_HTMLTAG . '#iums', $this->val); if ($allowed_tags === true || !is_array($allowed_tags)) { $allowed_tags = static::get_allowed_tags(); } $void_elements = static::get_void_elements(); $this->val = ''; $tag_count = []; // Everything in this loop gets called a lot, reduce wherever posible. foreach ($input_html[1] as $i => $tag) { $closingtag = false; if ($tag[0] === '/') { $closingtag = true; $tag = mb_substr($tag, 1); } $tag = !isset($allowed_tags[$tag]) ? mb_strtolower($tag) : $tag; if (isset($allowed_tags[$tag])) { // Clean up allowed tags. if ($closingtag === true) { // Add a closing tag, or remove needless closing tags for void elements. $html = isset($void_elements[$tag]) ? '' : '</' . $tag . '>'; } else { if (isset($allowed_tags[$tag]['opts']['max']) && isset($tag_count[$tag]) && $tag_count[$tag] >= $allowed_tags[$tag]['opts']['max']) { unset($allowed_tags[$tag]); unset($input_html[1][$i]); continue; } $html = '<' . $tag; foreach ($allowed_tags[$tag] as $attr => $type) { if ($attr === 'opts') { // Opts are options in how to treat the tag (max count, etc), do not treat as an attribute continue; } if (mb_strpos($input_html[0][$i], $attr) !== false) { preg_match('#' . $attr . '\\s*=\\s*("((?U).*)?"|\'((?U).*)?\'|([^>\\s]+))\\s*#iums', $input_html[0][$i], $m_attr); if (!empty($m_attr)) { $attr_val = end($m_attr); $html .= ' ' . $attr . '="'; if ($type === 'uri') { // Fix spaces in URLs. $attr_val = str_replace(' ', '+', $attr_val); if (!isset($abs_urls[$attr_val])) { $abs_urls[$attr_val] = \pdyn\datatype\Url::validate($attr_val) ? $attr_val : \pdyn\datatype\Url::make_absolute($attr_val, $this->sourceurl, true); } $html .= $abs_urls[$attr_val]; } else { $html .= htmlspecialchars($attr_val, ENT_QUOTES, 'UTF-8', false); } $html .= '"'; } } } if (isset($void_elements[$tag])) { $html .= '/'; } $html .= '>'; if (isset($tag_count[$tag])) { $tag_count[$tag]++; } else { $tag_count[$tag] = 1; } } } else { /* here we're removing a disallowed tag from the $input_html array so we can use the array for close_unclosed_tags this allows tells close_unclosed_tags which tags remain in the html and lets it do it's job without having to do another tag discovery regex on the text so basically, this is just a timesaving feature. */ unset($input_html[1][$i]); $html = ''; } $this->val .= htmlspecialchars($input_text[$i], ENT_COMPAT, 'UTF-8', false) . $html; } //if (isset($input_text[$i+1])) $this->val.=htmlentities($input_text[$i+1],ENT_QUOTES,'UTF-8',false); if (isset($input_text[$i + 1])) { $this->val .= $input_text[$i + 1]; } $this->close_tags($input_html); $this->val = trim($this->val); }
/** * Test make_absolute_url function. * * @dataProvider dataprovider_makeAbsoluteUrl * @param string $i The input URL * @param string $pageurl The source page url. * @param string $expected The expected output */ public function test_makeAbsoluteUrl($i, $pageurl, $expected) { $actual = \pdyn\datatype\Url::make_absolute($i, $pageurl); $this->assertEquals($expected, $actual); }