/** * This function runs various checks against the attributes. * * @param $tagname String with the name of the tag. * @param $attary Array with all tag attributes. * @param $rm_attnames See description for tln_sanitize * @param $bad_attvals See description for tln_sanitize * @param $add_attr_to_tag See description for tln_sanitize * @return Array with modified attributes. */ function tln_fixatts($tagname, $attary, $rm_attnames, $bad_attvals, $add_attr_to_tag) { $me = 'tln_fixatts'; while (list($attname, $attvalue) = each($attary)) { /** * See if this attribute should be removed. */ foreach ($rm_attnames as $matchtag => $matchattrs) { if (preg_match($matchtag, $tagname)) { foreach ($matchattrs as $matchattr) { if (preg_match($matchattr, $attname)) { unset($attary[$attname]); continue; } } } } /** * Remove any backslashes, entities, or extraneous whitespace. */ tln_defang($attvalue); tln_unspace($attvalue); /** * Now let's run checks on the attvalues. * I don't expect anyone to comprehend this. If you do, * get in touch with me so I can drive to where you live and * shake your hand personally. :) */ foreach ($bad_attvals as $matchtag => $matchattrs) { if (preg_match($matchtag, $tagname)) { foreach ($matchattrs as $matchattr => $valary) { if (preg_match($matchattr, $attname)) { /** * There are two arrays in valary. * First is matches. * Second one is replacements */ list($valmatch, $valrepl) = $valary; $newvalue = preg_replace($valmatch, $valrepl, $attvalue); if ($newvalue != $attvalue) { $attary[$attname] = $newvalue; } } } } } } /** * See if we need to append any attributes to this tag. */ foreach ($add_attr_to_tag as $matchtag => $addattary) { if (preg_match($matchtag, $tagname)) { $attary = array_merge($attary, $addattary); } } return $attary; }
function tln_fixstyle($body, $pos, $trans_image_path, $block_external_images) { $me = 'tln_fixstyle'; // workaround for </style> in between comments $iCurrentPos = $pos; $content = ''; $sToken = ''; $bSucces = false; $bEndTag = false; for ($i = $pos, $iCount = strlen($body); $i < $iCount; ++$i) { $char = $body[$i]; switch ($char) { case '<': $sToken = $char; break; case '/': if ($sToken == '<') { $sToken .= $char; $bEndTag = true; } else { $content .= $char; } break; case '>': if ($bEndTag) { $sToken .= $char; if (preg_match('/\\<\\/\\s*style\\s*\\>/i', $sToken, $aMatch)) { $newpos = $i + 1; $bSucces = true; break 2; } else { $content .= $sToken; } $bEndTag = false; } else { $content .= $char; } break; case '!': if ($sToken == '<') { // possible comment if (isset($body[$i + 2]) && substr($body, $i, 3) == '!--') { $i = strpos($body, '-->', $i + 3); if ($i === false) { // no end comment $i = strlen($body); } $sToken = ''; } } else { $content .= $char; } break; default: if ($bEndTag) { $sToken .= $char; } else { $content .= $char; } break; } } if ($bSucces == FALSE) { return array(FALSE, strlen($body)); } /** * First look for general BODY style declaration, which would be * like so: * body {background: blah-blah} * and change it to .bodyclass so we can just assign it to a <div> */ $content = preg_replace("|body(\\s*\\{.*?\\})|si", ".bodyclass\\1", $content); $trans_image_path = $trans_image_path; /** * Fix url('blah') declarations. */ // $content = preg_replace("|url\s*\(\s*([\'\"])\s*\S+script\s*:.*?([\'\"])\s*\)|si", // "url(\\1$trans_image_path\\2)", $content); // first check for 8bit sequences and disallowed control characters if (preg_match('/[\\16-\\37\\200-\\377]+/', $content)) { $content = '<!-- style block removed by html filter due to presence of 8bit characters -->'; return array($content, $newpos); } // remove @import line $content = preg_replace("/^\\s*(@import.*)\$/mi", "\n<!-- @import rules forbidden -->\n", $content); $content = preg_replace("/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i", 'url', $content); preg_match_all("/url\\s*\\((.+)\\)/si", $content, $aMatch); if (count($aMatch)) { $aValue = $aReplace = array(); foreach ($aMatch[1] as $sMatch) { // url value $urlvalue = $sMatch; tln_fixurl('style', $urlvalue, $trans_image_path, $block_external_images); $aValue[] = $sMatch; $aReplace[] = $urlvalue; } $content = str_replace($aValue, $aReplace, $content); } /** * Remove any backslashes, entities, and extraneous whitespace. */ $contentTemp = $content; tln_defang($contentTemp); tln_unspace($contentTemp); $match = array('/\\/\\*.*\\*\\//', '/expression/i', '/behaviou*r/i', '/binding/i', '/include-source/i', '/javascript/i', '/script/i', '/position/i'); $replace = array('', 'idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy', ''); $contentNew = preg_replace($match, $replace, $contentTemp); if ($contentNew !== $contentTemp) { $content = $contentNew; } return array($content, $newpos); }