function __construct($listmode, $secseparators, $multisecseparators, $inlinetext, $listattr = '', $itemattr = '', $listseparators, $iOffset, $dominantSection)
 {
     // default for inlinetext (if not in mode=userformat)
     if ($listmode != 'userformat' && $inlinetext == '') {
         $inlinetext = ' - ';
     }
     $this->name = $listmode;
     $_listattr = $listattr == '' ? '' : ' ' . Sanitizer::fixTagAttributes($listattr, 'ul');
     $_itemattr = $itemattr == '' ? '' : ' ' . Sanitizer::fixTagAttributes($itemattr, 'li');
     $this->sSectionTags = $secseparators;
     $this->aMultiSecSeparators = $multisecseparators;
     $this->iDominantSection = $dominantSection - 1;
     // 0 based index
     switch ($listmode) {
         case 'inline':
             if (stristr($inlinetext, '<BR />')) {
                 //one item per line (pseudo-inline)
                 $this->sListStart = '<DIV' . $_listattr . '>';
                 $this->sListEnd = '</DIV>';
             }
             $this->sItemStart = '<SPAN' . $_itemattr . '>';
             $this->sItemEnd = '</SPAN>';
             $this->sInline = $inlinetext;
             break;
         case 'ordered':
             if ($iOffset == 0) {
                 $this->sListStart = '<OL start=1 ' . $_listattr . '>';
             } else {
                 $this->sListStart = '<OL start=' . ($iOffset + 1) . ' ' . $_listattr . '>';
             }
             $this->sListEnd = '</OL>';
             $this->sItemStart = '<LI' . $_itemattr . '>';
             $this->sItemEnd = '</LI>';
             break;
         case 'unordered':
             $this->sListStart = '<UL' . $_listattr . '>';
             $this->sListEnd = '</UL>';
             $this->sItemStart = '<LI' . $_itemattr . '>';
             $this->sItemEnd = '</LI>';
             break;
         case 'definition':
             $this->sListStart = '<DL' . $_listattr . '>';
             $this->sListEnd = '</DL>';
             // item html attributes on dt element or dd element ?
             $this->sHeadingStart = '<DT>';
             $this->sHeadingEnd = '</DT><DD>';
             $this->sItemEnd = '</DD>';
             break;
         case 'H2':
         case 'H3':
         case 'H4':
             $this->sListStart = '<DIV' . $_listattr . '>';
             $this->sListEnd = '</DIV>';
             $this->sHeadingStart = '<' . $listmode . '>';
             $this->sHeadingEnd = '</' . $listmode . '>';
             break;
         case 'userformat':
             switch (count($listseparators)) {
                 case 4:
                     $this->sListEnd = $listseparators[3];
                 case 3:
                     $this->sItemEnd = $listseparators[2];
                 case 2:
                     $this->sItemStart = $listseparators[1];
                 case 1:
                     $this->sListStart = $listseparators[0];
             }
             $this->sInline = $inlinetext;
             break;
     }
 }
Exemple #2
0
 /**
  * Cleans up HTML, removes dangerous tags and attributes, and
  * removes HTML comments
  * @private
  * @param $text String
  * @param $processCallback Callback to do any variable or parameter replacements in HTML attribute values
  * @param $args Array for the processing callback
  * @param $extratags Array for any extra tags to include
  * @param $removetags Array for any tags (default or extra) to exclude
  * @return string
  */
 static function removeHTMLtags($text, $processCallback = null, $args = array(), $extratags = array(), $removetags = array())
 {
     global $wgUseTidy;
     static $htmlpairsStatic, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags, $htmllist, $listtags, $htmlsingleallowed, $htmlelementsStatic, $staticInitialised;
     wfProfileIn(__METHOD__);
     if (!$staticInitialised) {
         $htmlpairsStatic = array('b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's', 'strike', 'strong', 'tt', 'var', 'div', 'center', 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre', 'ruby', 'rt', 'rb', 'rp', 'p', 'span', 'abbr', 'dfn', 'kbd', 'samp', 'thead', 'tbody', 'tfoot');
         $htmlsingle = array('br', 'hr', 'li', 'dt', 'dd');
         $htmlsingleonly = array('br', 'hr');
         $htmlnest = array('table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul', 'dl', 'font', 'big', 'small', 'sub', 'sup', 'span');
         $tabletags = array('td', 'th', 'tr');
         $htmllist = array('ul', 'ol');
         $listtags = array('li');
         global $wgAllowImageTag;
         if ($wgAllowImageTag) {
             $htmlsingle[] = 'img';
             $htmlsingleonly[] = 'img';
         }
         $htmlsingleallowed = array_unique(array_merge($htmlsingle, $tabletags));
         $htmlelementsStatic = array_unique(array_merge($htmlsingle, $htmlpairsStatic, $htmlnest));
         # Convert them all to hashtables for faster lookup
         $vars = array('htmlpairsStatic', 'htmlsingle', 'htmlsingleonly', 'htmlnest', 'tabletags', 'htmllist', 'listtags', 'htmlsingleallowed', 'htmlelementsStatic');
         foreach ($vars as $var) {
             ${$var} = array_flip(${$var});
         }
         $staticInitialised = true;
     }
     # Populate $htmlpairs and $htmlelements with the $extratags and $removetags arrays
     $extratags = array_flip($extratags);
     $removetags = array_flip($removetags);
     $htmlpairs = array_merge($extratags, $htmlpairsStatic);
     $htmlelements = array_diff_key(array_merge($extratags, $htmlelementsStatic), $removetags);
     # Remove HTML comments
     $text = Sanitizer::removeHTMLcomments($text);
     $bits = explode('<', $text);
     $text = str_replace('>', '&gt;', array_shift($bits));
     if (!$wgUseTidy) {
         $tagstack = $tablestack = array();
         foreach ($bits as $x) {
             $regs = array();
             # $slash: Does the current element start with a '/'?
             # $t: Current element name
             # $params: String between element name and >
             # $brace: Ending '>' or '/>'
             # $rest: Everything until the next element of $bits
             if (preg_match('!^(/?)(\\w+)([^>]*?)(/{0,1}>)([^<]*)$!', $x, $regs)) {
                 list(, $slash, $t, $params, $brace, $rest) = $regs;
             } else {
                 $slash = $t = $params = $brace = $rest = null;
             }
             $badtag = false;
             if (isset($htmlelements[$t = strtolower($t)])) {
                 # Check our stack
                 if ($slash && isset($htmlsingleonly[$t])) {
                     $badtag = true;
                 } elseif ($slash) {
                     # Closing a tag... is it the one we just opened?
                     $ot = @array_pop($tagstack);
                     if ($ot != $t) {
                         if (isset($htmlsingleallowed[$ot])) {
                             # Pop all elements with an optional close tag
                             # and see if we find a match below them
                             $optstack = array();
                             array_push($optstack, $ot);
                             $ot = @array_pop($tagstack);
                             while ($ot != $t && isset($htmlsingleallowed[$ot])) {
                                 array_push($optstack, $ot);
                                 $ot = @array_pop($tagstack);
                             }
                             if ($t != $ot) {
                                 # No match. Push the optional elements back again
                                 $badtag = true;
                                 while ($ot = @array_pop($optstack)) {
                                     array_push($tagstack, $ot);
                                 }
                             }
                         } else {
                             @array_push($tagstack, $ot);
                             # <li> can be nested in <ul> or <ol>, skip those cases:
                             if (!isset($htmllist[$ot]) || !isset($listtags[$t])) {
                                 $badtag = true;
                             }
                         }
                     } else {
                         if ($t == 'table') {
                             $tagstack = array_pop($tablestack);
                         }
                     }
                     $newparams = '';
                 } else {
                     # Keep track for later
                     if (isset($tabletags[$t]) && !in_array('table', $tagstack)) {
                         $badtag = true;
                     } elseif (in_array($t, $tagstack) && !isset($htmlnest[$t])) {
                         $badtag = true;
                         # Is it a self closed htmlpair ? (bug 5487)
                     } elseif ($brace == '/>' && isset($htmlpairs[$t])) {
                         $badtag = true;
                     } elseif (isset($htmlsingleonly[$t])) {
                         # Hack to force empty tag for uncloseable elements
                         $brace = '/>';
                     } elseif (isset($htmlsingle[$t])) {
                         # Hack to not close $htmlsingle tags
                         $brace = null;
                     } elseif (isset($tabletags[$t]) && in_array($t, $tagstack)) {
                         // New table tag but forgot to close the previous one
                         $text .= "</{$t}>";
                     } else {
                         if ($t == 'table') {
                             array_push($tablestack, $tagstack);
                             $tagstack = array();
                         }
                         array_push($tagstack, $t);
                     }
                     # Replace any variables or template parameters with
                     # plaintext results.
                     if (is_callable($processCallback)) {
                         call_user_func_array($processCallback, array(&$params, $args));
                     }
                     # Strip non-approved attributes from the tag
                     $newparams = Sanitizer::fixTagAttributes($params, $t);
                 }
                 if (!$badtag) {
                     $rest = str_replace('>', '&gt;', $rest);
                     $close = $brace == '/>' && !$slash ? ' /' : '';
                     $text .= "<{$slash}{$t}{$newparams}{$close}>{$rest}";
                     continue;
                 }
             }
             $text .= '&lt;' . str_replace('>', '&gt;', $x);
         }
         # Close off any remaining tags
         while (is_array($tagstack) && ($t = array_pop($tagstack))) {
             $text .= "</{$t}>\n";
             if ($t == 'table') {
                 $tagstack = array_pop($tablestack);
             }
         }
     } else {
         # this might be possible using tidy itself
         foreach ($bits as $x) {
             preg_match('/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/', $x, $regs);
             @(list(, $slash, $t, $params, $brace, $rest) = $regs);
             if (isset($htmlelements[$t = strtolower($t)])) {
                 if (is_callable($processCallback)) {
                     call_user_func_array($processCallback, array(&$params, $args));
                 }
                 $newparams = Sanitizer::fixTagAttributes($params, $t);
                 $rest = str_replace('>', '&gt;', $rest);
                 $text .= "<{$slash}{$t}{$newparams}{$brace}{$rest}";
             } else {
                 $text .= '&lt;' . str_replace('>', '&gt;', $x);
             }
         }
     }
     wfProfileOut(__METHOD__);
     return $text;
 }
Exemple #3
0
 /**
  * @dataProvider provideAttributeSupport
  */
 function testAttributeSupport($tag, $attributes, $expected, $message)
 {
     $this->assertEquals($expected, Sanitizer::fixTagAttributes($attributes, $tag), $message);
 }
Exemple #4
0
 /**
  * parse the wiki syntax used to render tables
  *
  * @private
  */
 function doTableStuff($text)
 {
     wfProfileIn(__METHOD__);
     # RTE (Rich Text Editor) - begin
     # Used to determine whether the Parser running in RTE mode or not
     global $wgRTEParserEnabled;
     # RTE - end
     $lines = StringUtils::explode("\n", $text);
     $out = '';
     $td_history = array();
     # Is currently a td tag open?
     $last_tag_history = array();
     # Save history of last lag activated (td, th or caption)
     $tr_history = array();
     # Is currently a tr tag open?
     $tr_attributes = array();
     # history of tr attributes
     $has_opened_tr = array();
     # Did this table open a <tr> element?
     $indent_level = 0;
     # indent level of the table
     foreach ($lines as $outLine) {
         $line = trim($outLine);
         # RTE (Rich Text Editor) - begin
         # @author: Inez Korczyński
         # Initialize this variable regardless of the RTE mode being on/off,
         # then it can be used in next batch of code without checking RTE mode.
         $RTEcomment = null;
         if (!empty($wgRTEParserEnabled)) {
             # Check if there is a wikitext comment placholder at the beginning of given line,
             # then cut it off - to have proper MediaWiki table processing - and store in variable for later recovery
             $RTEdataIdx = RTEMarker::getDataIdx(RTEMarker::PLACEHOLDER, $line, false);
             if ($RTEdataIdx != null) {
                 $RTEdata = RTEData::get('placeholder', $RTEdataIdx);
                 if ($RTEdata && $RTEdata['type'] == 'comment') {
                     $RTEcomment = substr($line, 0, 9);
                     $line = substr($line, 9);
                 }
             }
         }
         # RTE - end
         if ($line === '') {
             # empty line, go to next line
             $out .= $outLine . "\n";
             continue;
         }
         $first_character = $line[0];
         $matches = array();
         if (preg_match('/^(:*)\\{\\|(.*)$/', $line, $matches)) {
             # First check if we are starting a new table
             $indent_level = strlen($matches[1]);
             $attributes = $this->mStripState->unstripBoth($matches[2]);
             # RTE (Rich Text Editor) - begin
             # @author: Inez Korczyński
             if (!empty($wgRTEParserEnabled)) {
                 # Throw an RTE edgacase if there is RTR marker (\x7f) in table attributes
                 # Example: {| {{some template call}}
                 if (strpos($attributes, "") !== false) {
                     RTE::$edgeCases[] = 'COMPLEX.04';
                 }
             }
             # RTE - end
             $attributes = Sanitizer::fixTagAttributes($attributes, 'table');
             $outLine = str_repeat('<dl><dd>', $indent_level) . "<table{$attributes}>";
             # RTE (Rich Text Editor) - begin
             $outLine = $RTEcomment . $outLine;
             $RTEcomment = null;
             # RTE - end
             array_push($td_history, false);
             array_push($last_tag_history, '');
             array_push($tr_history, false);
             array_push($tr_attributes, '');
             array_push($has_opened_tr, false);
         } elseif (count($td_history) == 0) {
             # Don't do any of the following
             $out .= $outLine . "\n";
             continue;
         } elseif (substr($line, 0, 2) === '|}') {
             # We are ending a table
             $line = '</table>' . substr($line, 2);
             $last_tag = array_pop($last_tag_history);
             if (!array_pop($has_opened_tr)) {
                 $line = "<tr><td></td></tr>{$line}";
             }
             if (array_pop($tr_history)) {
                 $line = "</tr>{$line}";
             }
             if (array_pop($td_history)) {
                 $line = "</{$last_tag}>{$line}";
             }
             array_pop($tr_attributes);
             $outLine = $line . str_repeat('</dd></dl>', $indent_level);
         } elseif (substr($line, 0, 2) === '|-') {
             # Now we have a table row
             $line = preg_replace('#^\\|-+#', '', $line);
             # Whats after the tag is now only attributes
             $attributes = $this->mStripState->unstripBoth($line);
             # RTE (Rich Text Editor) - begin
             # @author: Inez Korczyński
             if (!empty($wgRTEParserEnabled)) {
                 # Throw an RTE edgacase if there is RTE marker (\x7f) in row attributes
                 if (strpos($attributes, "") !== false) {
                     RTE::$edgeCases[] = 'COMPLEX.05';
                 }
             }
             # RTE - end
             $attributes = Sanitizer::fixTagAttributes($attributes, 'tr');
             array_pop($tr_attributes);
             array_push($tr_attributes, $attributes);
             $line = '';
             $last_tag = array_pop($last_tag_history);
             array_pop($has_opened_tr);
             array_push($has_opened_tr, true);
             if (array_pop($tr_history)) {
                 $line = '</tr>';
             }
             if (array_pop($td_history)) {
                 $line = "</{$last_tag}>{$line}";
             }
             $outLine = $line;
             array_push($tr_history, false);
             array_push($td_history, false);
             array_push($last_tag_history, '');
         } elseif ($first_character === '|' || $first_character === '!' || substr($line, 0, 2) === '|+') {
             # This might be cell elements, td, th or captions
             if (substr($line, 0, 2) === '|+') {
                 $first_character = '+';
                 $line = substr($line, 1);
             }
             $line = substr($line, 1);
             if ($first_character === '!') {
                 $line = str_replace('!!', '||', $line);
             }
             # Split up multiple cells on the same line.
             # FIXME : This can result in improper nesting of tags processed
             # by earlier parser steps, but should avoid splitting up eg
             # attribute values containing literal "||".
             $cells = StringUtils::explodeMarkup('||', $line);
             $outLine = '';
             # Loop through each table cell
             foreach ($cells as $cell) {
                 $previous = '';
                 if ($first_character !== '+') {
                     $tr_after = array_pop($tr_attributes);
                     if (!array_pop($tr_history)) {
                         $previous = "<tr{$tr_after}>\n";
                     }
                     array_push($tr_history, true);
                     array_push($tr_attributes, '');
                     array_pop($has_opened_tr);
                     array_push($has_opened_tr, true);
                 }
                 $last_tag = array_pop($last_tag_history);
                 if (array_pop($td_history)) {
                     $previous = "</{$last_tag}>{$previous}";
                 }
                 if ($first_character === '|') {
                     $last_tag = 'td';
                 } elseif ($first_character === '!') {
                     $last_tag = 'th';
                 } elseif ($first_character === '+') {
                     $last_tag = 'caption';
                 } else {
                     $last_tag = '';
                 }
                 array_push($last_tag_history, $last_tag);
                 # A cell could contain both parameters and data
                 $cell_data = explode('|', $cell, 2);
                 # Bug 553: Note that a '|' inside an invalid link should not
                 # be mistaken as delimiting cell parameters
                 if (strpos($cell_data[0], '[[') !== false) {
                     $cell = "{$previous}<{$last_tag}>{$cell}";
                 } elseif (count($cell_data) == 1) {
                     $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
                 } else {
                     $attributes = $this->mStripState->unstripBoth($cell_data[0]);
                     $attributes = Sanitizer::fixTagAttributes($attributes, $last_tag);
                     $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
                 }
                 $outLine .= $cell;
                 array_push($td_history, true);
             }
         } else {
             # RTE (Rich Text Editor) - begin
             # @author: Inez Korczyński
             if (!empty($wgRTEParserEnabled)) {
                 if (empty($td_history[0]) || $last_tag == 'caption') {
                     if (strpos($outLine, "-comment-") !== false) {
                         RTE::$edgeCases[] = 'COMPLEX.06';
                     }
                 }
             }
             # RTE - end
         }
         # RTE (Rich Text Editor) - begin
         # @author: Inez Korczyński
         if (!empty($RTEcomment)) {
             # Throw an edgecase if $RTEcomment did not get flushed (nulled) yet
             RTE::$edgeCases[] = 'COMPLEX.10';
         }
         # RTE - end
         $out .= $outLine . "\n";
     }
     # Closing open td, tr && table
     while (count($td_history) > 0) {
         if (array_pop($td_history)) {
             $out .= "</td>\n";
         }
         if (array_pop($tr_history)) {
             $out .= "</tr>\n";
         }
         if (!array_pop($has_opened_tr)) {
             $out .= "<tr><td></td></tr>\n";
         }
         $out .= "</table>\n";
     }
     # Remove trailing line-ending (b/c)
     if (substr($out, -1) === "\n") {
         $out = substr($out, 0, -1);
     }
     # special case: don't return empty table
     if ($out === "<table>\n<tr><td></td></tr>\n</table>") {
         $out = '';
     }
     wfProfileOut(__METHOD__);
     return $out;
 }
Exemple #5
0
 /**
  * Cleans up HTML, removes dangerous tags and attributes, and
  * removes HTML comments
  * @private
  * @param string $text
  * @param callback $processCallback to do any variable or parameter replacements in HTML attribute values
  * @param array $args for the processing callback
  * @return string
  */
 static function removeHTMLtags($text, $processCallback = null, $args = array())
 {
     global $wgUseTidy, $wgUserHtml;
     static $htmlpairs, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags, $htmllist, $listtags, $htmlsingleallowed, $htmlelements, $staticInitialised;
     wfProfileIn(__METHOD__);
     if (!$staticInitialised) {
         if ($wgUserHtml) {
             $htmlpairs = array('b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's', 'strike', 'strong', 'tt', 'var', 'div', 'center', 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre', 'ruby', 'rt', 'rb', 'rp', 'p', 'span', 'u');
             $htmlsingle = array('br', 'hr', 'li', 'dt', 'dd');
             $htmlsingleonly = array('br', 'hr');
             $htmlnest = array('table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul', 'dl', 'font', 'big', 'small', 'sub', 'sup', 'span');
             $tabletags = array('td', 'th', 'tr');
             $htmllist = array('ul', 'ol');
             $listtags = array('li');
         } else {
             $htmlpairs = array();
             $htmlsingle = array();
             $htmlnest = array();
             $tabletags = array();
         }
         $htmlsingleallowed = array_merge($htmlsingle, $tabletags);
         $htmlelements = array_merge($htmlsingle, $htmlpairs, $htmlnest);
         # Convert them all to hashtables for faster lookup
         $vars = array('htmlpairs', 'htmlsingle', 'htmlsingleonly', 'htmlnest', 'tabletags', 'htmllist', 'listtags', 'htmlsingleallowed', 'htmlelements');
         foreach ($vars as $var) {
             ${$var} = array_flip(${$var});
         }
         $staticInitialised = true;
     }
     # Remove HTML comments
     $text = Sanitizer::removeHTMLcomments($text);
     $bits = explode('<', $text);
     $text = array_shift($bits);
     if (!$wgUseTidy) {
         $tagstack = $tablestack = array();
         foreach ($bits as $x) {
             $prev = error_reporting(E_ALL & ~(E_NOTICE | E_WARNING));
             preg_match('!^(/?)(\\w+)([^>]*?)(/{0,1}>)([^<]*)$!', $x, $regs);
             list($qbar, $slash, $t, $params, $brace, $rest) = $regs;
             error_reporting($prev);
             $badtag = 0;
             if (isset($htmlelements[$t = strtolower($t)])) {
                 # Check our stack
                 if ($slash) {
                     # Closing a tag...
                     if (isset($htmlsingleonly[$t])) {
                         $badtag = 1;
                     } elseif (($ot = @array_pop($tagstack)) != $t) {
                         if (isset($htmlsingleallowed[$ot])) {
                             # Pop all elements with an optional close tag
                             # and see if we find a match below them
                             $optstack = array();
                             array_push($optstack, $ot);
                             while (($ot = @array_pop($tagstack)) != $t && isset($htmlsingleallowed[$ot])) {
                                 array_push($optstack, $ot);
                             }
                             if ($t != $ot) {
                                 # No match. Push the optinal elements back again
                                 $badtag = 1;
                                 while ($ot = @array_pop($optstack)) {
                                     array_push($tagstack, $ot);
                                 }
                             }
                         } else {
                             @array_push($tagstack, $ot);
                             # <li> can be nested in <ul> or <ol>, skip those cases:
                             if (!(isset($htmllist[$ot]) && isset($listtags[$t]))) {
                                 $badtag = 1;
                             }
                         }
                     } else {
                         if ($t == 'table') {
                             $tagstack = array_pop($tablestack);
                         }
                     }
                     $newparams = '';
                 } else {
                     # Keep track for later
                     if (isset($tabletags[$t]) && !in_array('table', $tagstack)) {
                         $badtag = 1;
                     } else {
                         if (in_array($t, $tagstack) && !isset($htmlnest[$t])) {
                             $badtag = 1;
                             # Is it a self closed htmlpair ? (bug 5487)
                         } else {
                             if ($brace == '/>' && isset($htmlpairs[$t])) {
                                 $badtag = 1;
                             } elseif (isset($htmlsingleonly[$t])) {
                                 # Hack to force empty tag for uncloseable elements
                                 $brace = '/>';
                             } else {
                                 if (isset($htmlsingle[$t])) {
                                     # Hack to not close $htmlsingle tags
                                     $brace = NULL;
                                 } else {
                                     if ($t == 'table') {
                                         array_push($tablestack, $tagstack);
                                         $tagstack = array();
                                     }
                                     array_push($tagstack, $t);
                                 }
                             }
                         }
                     }
                     # Replace any variables or template parameters with
                     # plaintext results.
                     if (is_callable($processCallback)) {
                         call_user_func_array($processCallback, array(&$params, $args));
                     }
                     # Strip non-approved attributes from the tag
                     $newparams = Sanitizer::fixTagAttributes($params, $t);
                 }
                 if (!$badtag) {
                     $rest = str_replace('>', '&gt;', $rest);
                     $close = $brace == '/>' ? ' /' : '';
                     $text .= "<{$slash}{$t}{$newparams}{$close}>{$rest}";
                     continue;
                 }
             }
             $text .= '&lt;' . str_replace('>', '&gt;', $x);
         }
         # Close off any remaining tags
         while (is_array($tagstack) && ($t = array_pop($tagstack))) {
             $text .= "</{$t}>\n";
             if ($t == 'table') {
                 $tagstack = array_pop($tablestack);
             }
         }
     } else {
         # this might be possible using tidy itself
         foreach ($bits as $x) {
             preg_match('/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/', $x, $regs);
             @(list($qbar, $slash, $t, $params, $brace, $rest) = $regs);
             if (isset($htmlelements[$t = strtolower($t)])) {
                 if (is_callable($processCallback)) {
                     call_user_func_array($processCallback, array(&$params, $args));
                 }
                 $newparams = Sanitizer::fixTagAttributes($params, $t);
                 $rest = str_replace('>', '&gt;', $rest);
                 $text .= "<{$slash}{$t}{$newparams}{$brace}{$rest}";
             } else {
                 $text .= '&lt;' . str_replace('>', '&gt;', $x);
             }
         }
     }
     wfProfileOut(__METHOD__);
     return $text;
 }
 /**
  * Cleans up HTML, removes dangerous tags and attributes, and
  * removes HTML comments
  * @access private
  * @param string $text
  * @param callback $processCallback to do any variable or parameter replacements in HTML attribute values
  * @param array $args for the processing callback
  * @return string
  */
 function removeHTMLtags($text, $processCallback = null, $args = array())
 {
     global $wgUseTidy, $wgUserHtml;
     $fname = 'Parser::removeHTMLtags';
     wfProfileIn($fname);
     if ($wgUserHtml) {
         $htmlpairs = array('b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's', 'strike', 'strong', 'tt', 'var', 'div', 'center', 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre', 'ruby', 'rt', 'rb', 'rp', 'p', 'span');
         $htmlsingle = array('br', 'hr', 'li', 'dt', 'dd');
         $htmlsingleonly = array('br', 'hr');
         $htmlnest = array('table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul', 'dl', 'font', 'big', 'small', 'sub', 'sup', 'span');
         $tabletags = array('td', 'th', 'tr', 'tbody');
     } else {
         $htmlpairs = array();
         $htmlsingle = array();
         $htmlnest = array();
         $tabletags = array();
     }
     $htmlsingle = array_merge($tabletags, $htmlsingle);
     $htmlelements = array_merge($htmlsingle, $htmlpairs);
     # Remove HTML comments
     $text = Sanitizer::removeHTMLcomments($text);
     $bits = explode('<', $text);
     $text = array_shift($bits);
     if (!$wgUseTidy) {
         $tagstack = array();
         $tablestack = array();
         foreach ($bits as $x) {
             $prev = error_reporting(E_ALL & ~(E_NOTICE | E_WARNING));
             preg_match('/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/', $x, $regs);
             list($qbar, $slash, $t, $params, $brace, $rest) = $regs;
             error_reporting($prev);
             $badtag = 0;
             if (in_array($t = strtolower($t), $htmlelements)) {
                 # Check our stack
                 if ($slash) {
                     # Closing a tag...
                     if (in_array($t, $htmlsingleonly)) {
                         $badtag = 1;
                     } elseif (!in_array($t, $htmlsingle) && ($ot = @array_pop($tagstack)) != $t) {
                         @array_push($tagstack, $ot);
                         $badtag = 1;
                     } else {
                         if ($t == 'table') {
                             $tagstack = array_pop($tablestack);
                         }
                         $newparams = '';
                     }
                 } else {
                     # Keep track for later
                     if (in_array($t, $tabletags) && !in_array('table', $tagstack)) {
                         $badtag = 1;
                     } else {
                         if (in_array($t, $tagstack) && !in_array($t, $htmlnest)) {
                             $badtag = 1;
                         } elseif (in_array($t, $htmlsingleonly)) {
                             # Hack to force empty tag for uncloseable elements
                             $brace = '/>';
                         } else {
                             if (!in_array($t, $htmlsingle)) {
                                 if ($t == 'table') {
                                     array_push($tablestack, $tagstack);
                                     $tagstack = array();
                                 }
                                 array_push($tagstack, $t);
                             }
                         }
                     }
                     # Replace any variables or template parameters with
                     # plaintext results.
                     if (is_callable($processCallback)) {
                         call_user_func_array($processCallback, array(&$params, $args));
                     }
                     # Strip non-approved attributes from the tag
                     $newparams = Sanitizer::fixTagAttributes($params, $t);
                 }
                 if (!$badtag) {
                     $rest = str_replace('>', '&gt;', $rest);
                     $close = $brace == '/>' ? ' /' : '';
                     $text .= "<{$slash}{$t}{$newparams}{$close}>{$rest}";
                     continue;
                 }
             }
             $text .= '&lt;' . str_replace('>', '&gt;', $x);
         }
         # Close off any remaining tags
         while (is_array($tagstack) && ($t = array_pop($tagstack))) {
             $text .= "</{$t}>\n";
             if ($t == 'table') {
                 $tagstack = array_pop($tablestack);
             }
         }
     } else {
         # this might be possible using tidy itself
         foreach ($bits as $x) {
             preg_match('/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/', $x, $regs);
             @(list($qbar, $slash, $t, $params, $brace, $rest) = $regs);
             if (in_array($t = strtolower($t), $htmlelements)) {
                 if (is_callable($processCallback)) {
                     call_user_func_array($processCallback, array(&$params, $args));
                 }
                 $newparams = Sanitizer::fixTagAttributes($params, $t);
                 $rest = str_replace('>', '&gt;', $rest);
                 $text .= "<{$slash}{$t}{$newparams}{$brace}{$rest}";
             } else {
                 $text .= '&lt;' . str_replace('>', '&gt;', $x);
             }
         }
     }
     wfProfileOut($fname);
     return $text;
 }
Exemple #7
0
 /**
  * parse the wiki syntax used to render tables
  *
  * @private
  */
 function doTableStuff($t)
 {
     $fname = 'Parser::doTableStuff';
     wfProfileIn($fname);
     $t = explode("\n", $t);
     $td = array();
     # Is currently a td tag open?
     $ltd = array();
     # Was it TD or TH?
     $tr = array();
     # Is currently a tr tag open?
     $ltr = array();
     # tr attributes
     $has_opened_tr = array();
     # Did this table open a <tr> element?
     $indent_level = 0;
     # indent level of the table
     foreach ($t as $k => $x) {
         $x = trim($x);
         $fc = substr($x, 0, 1);
         if (preg_match('/^(:*)\\{\\|(.*)$/', $x, $matches)) {
             $indent_level = strlen($matches[1]);
             $attributes = $this->unstripForHTML($matches[2]);
             $t[$k] = str_repeat('<dl><dd>', $indent_level) . '<table' . Sanitizer::fixTagAttributes($attributes, 'table') . '>';
             array_push($td, false);
             array_push($ltd, '');
             array_push($tr, false);
             array_push($ltr, '');
             array_push($has_opened_tr, false);
         } else {
             if (count($td) == 0) {
             } else {
                 if ('|}' == substr($x, 0, 2)) {
                     $z = "</table>" . substr($x, 2);
                     $l = array_pop($ltd);
                     if (!array_pop($has_opened_tr)) {
                         $z = "<tr><td></td></tr>" . $z;
                     }
                     if (array_pop($tr)) {
                         $z = '</tr>' . $z;
                     }
                     if (array_pop($td)) {
                         $z = '</' . $l . '>' . $z;
                     }
                     array_pop($ltr);
                     $t[$k] = $z . str_repeat('</dd></dl>', $indent_level);
                 } else {
                     if ('|-' == substr($x, 0, 2)) {
                         # Allows for |---------------
                         $x = substr($x, 1);
                         while ($x != '' && substr($x, 0, 1) == '-') {
                             $x = substr($x, 1);
                         }
                         $z = '';
                         $l = array_pop($ltd);
                         array_pop($has_opened_tr);
                         array_push($has_opened_tr, true);
                         if (array_pop($tr)) {
                             $z = '</tr>' . $z;
                         }
                         if (array_pop($td)) {
                             $z = '</' . $l . '>' . $z;
                         }
                         array_pop($ltr);
                         $t[$k] = $z;
                         array_push($tr, false);
                         array_push($td, false);
                         array_push($ltd, '');
                         $attributes = $this->unstripForHTML($x);
                         array_push($ltr, Sanitizer::fixTagAttributes($attributes, 'tr'));
                     } else {
                         if ('|' == $fc || '!' == $fc || '|+' == substr($x, 0, 2)) {
                             # Caption
                             # $x is a table row
                             if ('|+' == substr($x, 0, 2)) {
                                 $fc = '+';
                                 $x = substr($x, 1);
                             }
                             $after = substr($x, 1);
                             if ($fc == '!') {
                                 $after = str_replace('!!', '||', $after);
                             }
                             // Split up multiple cells on the same line.
                             // FIXME: This can result in improper nesting of tags processed
                             // by earlier parser steps, but should avoid splitting up eg
                             // attribute values containing literal "||".
                             $after = wfExplodeMarkup('||', $after);
                             $t[$k] = '';
                             # Loop through each table cell
                             foreach ($after as $theline) {
                                 $z = '';
                                 if ($fc != '+') {
                                     $tra = array_pop($ltr);
                                     if (!array_pop($tr)) {
                                         $z = '<tr' . $tra . ">\n";
                                     }
                                     array_push($tr, true);
                                     array_push($ltr, '');
                                     array_pop($has_opened_tr);
                                     array_push($has_opened_tr, true);
                                 }
                                 $l = array_pop($ltd);
                                 if (array_pop($td)) {
                                     $z = '</' . $l . '>' . $z;
                                 }
                                 if ($fc == '|') {
                                     $l = 'td';
                                 } else {
                                     if ($fc == '!') {
                                         $l = 'th';
                                     } else {
                                         if ($fc == '+') {
                                             $l = 'caption';
                                         } else {
                                             $l = '';
                                         }
                                     }
                                 }
                                 array_push($ltd, $l);
                                 # Cell parameters
                                 $y = explode('|', $theline, 2);
                                 # Note that a '|' inside an invalid link should not
                                 # be mistaken as delimiting cell parameters
                                 if (strpos($y[0], '[[') !== false) {
                                     $y = array($theline);
                                 }
                                 if (count($y) == 1) {
                                     $y = "{$z}<{$l}>{$y[0]}";
                                 } else {
                                     $attributes = $this->unstripForHTML($y[0]);
                                     $y = "{$z}<{$l}" . Sanitizer::fixTagAttributes($attributes, $l) . ">{$y[1]}";
                                 }
                                 $t[$k] .= $y;
                                 array_push($td, true);
                             }
                         }
                     }
                 }
             }
         }
     }
     # Closing open td, tr && table
     while (count($td) > 0) {
         $l = array_pop($ltd);
         if (array_pop($td)) {
             $t[] = '</td>';
         }
         if (array_pop($tr)) {
             $t[] = '</tr>';
         }
         if (!array_pop($has_opened_tr)) {
             $t[] = "<tr><td></td></tr>";
         }
         $t[] = '</table>';
     }
     $t = implode("\n", $t);
     # special case: don't return empty table
     if ($t == "<table>\n<tr><td></td></tr>\n</table>") {
         $t = '';
     }
     wfProfileOut($fname);
     return $t;
 }
 function testDeprecatedAttributesDisabled()
 {
     global $wgCleanupPresentationalAttributes;
     $wgCleanupPresentationalAttributes = false;
     $this->assertEquals(' clear="left"', Sanitizer::fixTagAttributes('clear="left"', 'br'), 'Deprecated attributes are not converted to styles when enabled.');
 }
 /**
  * Parse tables
  *
  * @param string Content
  * @return string Content
  */
 function parse_tables($content)
 {
     $lines = explode("\n", $content);
     $out = '';
     $td_history = array();
     // Is currently a td tag open?
     $last_tag_history = array();
     // Save history of last lag activated (td, th or caption)
     $tr_history = array();
     // Is currently a tr tag open?
     $tr_attributes = array();
     // history of tr attributes
     $has_opened_tr = array();
     // Did this table open a <tr> element?
     $indent_level = 0;
     // indent level of the table
     foreach ($lines as $outLine) {
         $line = trim($outLine);
         if ($line === '') {
             // empty line, go to next line
             $out .= $outLine . "\n";
             continue;
         }
         $first_character = $line[0];
         $matches = array();
         if (preg_match('/^(:*)\\{\\|(.*)$/', $line, $matches)) {
             // First check if we are starting a new table
             $indent_level = strlen($matches[1]);
             $attributes = Sanitizer::fixTagAttributes($matches[2], 'table');
             $outLine = str_repeat('<dl><dd>', $indent_level) . "<table{$attributes}>";
             array_push($td_history, false);
             array_push($last_tag_history, '');
             array_push($tr_history, false);
             array_push($tr_attributes, '');
             array_push($has_opened_tr, false);
         } elseif (count($td_history) == 0) {
             // Don't do any of the following
             $out .= $outLine . "\n";
             continue;
         } elseif (substr($line, 0, 2) === '|}') {
             // We are ending a table
             $line = '</table>' . substr($line, 2);
             $last_tag = array_pop($last_tag_history);
             if (!array_pop($has_opened_tr)) {
                 $line = "<tr><td></td></tr>{$line}";
             }
             if (array_pop($tr_history)) {
                 $line = "</tr>{$line}";
             }
             if (array_pop($td_history)) {
                 $line = "</{$last_tag}>{$line}";
             }
             array_pop($tr_attributes);
             $outLine = $line . str_repeat('</dd></dl>', $indent_level);
         } elseif (substr($line, 0, 2) === '|-') {
             // Now we have a table row
             $line = preg_replace('#^\\|-+#', '', $line);
             // Whats after the tag is now only attributes
             $attributes = Sanitizer::fixTagAttributes($line, 'tr');
             array_pop($tr_attributes);
             array_push($tr_attributes, $attributes);
             $line = '';
             $last_tag = array_pop($last_tag_history);
             array_pop($has_opened_tr);
             array_push($has_opened_tr, true);
             if (array_pop($tr_history)) {
                 $line = '</tr>';
             }
             if (array_pop($td_history)) {
                 $line = "</{$last_tag}>{$line}";
             }
             $outLine = $line;
             array_push($tr_history, false);
             array_push($td_history, false);
             array_push($last_tag_history, '');
         } elseif ($first_character === '|' || $first_character === '!' || substr($line, 0, 2) === '|+') {
             // This might be cell elements, td, th or captions
             if (substr($line, 0, 2) === '|+') {
                 $first_character = '+';
                 $line = substr($line, 1);
             }
             $line = substr($line, 1);
             if ($first_character === '!') {
                 $line = str_replace('!!', '||', $line);
             }
             // Split up multiple cells on the same line.
             $cells = explode('||', $line);
             $outLine = '';
             // Loop through each table cell
             foreach ($cells as $cell) {
                 $previous = '';
                 if ($first_character !== '+') {
                     $tr_after = array_pop($tr_attributes);
                     if (!array_pop($tr_history)) {
                         $previous = "<tr{$tr_after}>\n";
                     }
                     array_push($tr_history, true);
                     array_push($tr_attributes, '');
                     array_pop($has_opened_tr);
                     array_push($has_opened_tr, true);
                 }
                 $last_tag = array_pop($last_tag_history);
                 if (array_pop($td_history)) {
                     $previous = "</{$last_tag}>\n{$previous}";
                 }
                 if ($first_character === '|') {
                     $last_tag = 'td';
                 } elseif ($first_character === '!') {
                     $last_tag = 'th';
                 } elseif ($first_character === '+') {
                     $last_tag = 'caption';
                 } else {
                     $last_tag = '';
                 }
                 array_push($last_tag_history, $last_tag);
                 // A cell could contain both parameters and data
                 $cell_data = explode('|', $cell, 2);
                 if (strpos($cell_data[0], '[[') !== false) {
                     $cell = "{$previous}<{$last_tag}>{$cell}";
                 } elseif (count($cell_data) == 1) {
                     $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
                 } else {
                     $attributes = Sanitizer::fixTagAttributes($cell_data[0], $last_tag);
                     $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
                 }
                 $outLine .= $cell;
                 array_push($td_history, true);
             }
         }
         $out .= $outLine . "\n";
     }
     // Closing open td, tr && table
     while (count($td_history) > 0) {
         if (array_pop($td_history)) {
             $out .= "</td>\n";
         }
         if (array_pop($tr_history)) {
             $out .= "</tr>\n";
         }
         if (!array_pop($has_opened_tr)) {
             $out .= "<tr><td></td></tr>\n";
         }
         $out .= "</table>\n";
     }
     if (substr($out, -1) === "\n") {
         // Remove trailing line-ending (b/c)
         $out = substr($out, 0, -1);
     }
     if ($out === "<table>\n<tr><td></td></tr>\n</table>") {
         // special case: don't return empty table
         $out = '';
     }
     return $out;
 }
 /**
  * @dataProvider provideDeprecatedAttributes
  */
 function testDeprecatedAttributesUnaltered($inputAttr, $inputEl)
 {
     $this->assertEquals(" {$inputAttr}", Sanitizer::fixTagAttributes($inputAttr, $inputEl));
 }
Exemple #11
0
 function testDeprecatedAttributes()
 {
     $GLOBALS['wgCleanupPresentationalAttributes'] = true;
     $this->assertEquals(Sanitizer::fixTagAttributes('clear="left"', 'br'), ' style="clear: left;"', 'Deprecated attributes are converted to styles when enabled.');
     $this->assertEquals(Sanitizer::fixTagAttributes('clear="all"', 'br'), ' style="clear: both;"', 'clear=all is converted to clear: both; not clear: all;');
     $this->assertEquals(Sanitizer::fixTagAttributes('CLEAR="ALL"', 'br'), ' style="clear: both;"', 'clear=ALL is not treated differently from clear=all');
     $this->assertEquals(Sanitizer::fixTagAttributes('width="100"', 'td'), ' style="width: 100px;"', 'Numeric sizes use pixels instead of numbers.');
     $this->assertEquals(Sanitizer::fixTagAttributes('width="100%"', 'td'), ' style="width: 100%;"', 'Units are allowed in sizes.');
     $this->assertEquals(Sanitizer::fixTagAttributes('WIDTH="100%"', 'td'), ' style="width: 100%;"', 'Uppercase WIDTH is treated as lowercase width.');
     $this->assertEquals(Sanitizer::fixTagAttributes('WiDTh="100%"', 'td'), ' style="width: 100%;"', 'Mixed case does not break WiDTh.');
     $this->assertEquals(Sanitizer::fixTagAttributes('nowrap="true"', 'td'), ' style="white-space: nowrap;"', 'nowrap attribute is output as white-space: nowrap; not something else.');
     $this->assertEquals(Sanitizer::fixTagAttributes('nowrap=""', 'td'), ' style="white-space: nowrap;"', 'nowrap="" is considered true, not false');
     $this->assertEquals(Sanitizer::fixTagAttributes('NOWRAP="true"', 'td'), ' style="white-space: nowrap;"', 'nowrap attribute works when uppercase.');
     $this->assertEquals(Sanitizer::fixTagAttributes('NoWrAp="true"', 'td'), ' style="white-space: nowrap;"', 'nowrap attribute works when mixed-case.');
     $GLOBALS['wgCleanupPresentationalAttributes'] = false;
     $this->assertEquals(Sanitizer::fixTagAttributes('clear="left"', 'br'), ' clear="left"', 'Deprecated attributes are not converted to styles when enabled.');
 }
 private function externalTableHelper($t)
 {
     $latexformat = '';
     $t = trim($t);
     $t = explode("\n", $t);
     $ltd = array();
     # Is current cell TD or TH?
     $tr = array();
     # Is currently a tr tag open?
     $ltr = array();
     # tr attributes
     $cellcount_max = array();
     $cellcount_current = array();
     $tableheader = array();
     $thkr = array();
     # table header index array
     $th = 0;
     $has_opened_tr = array();
     # Did this table open a <tr> element?
     $anyCells = false;
     $firstCellOfRow = true;
     $ltx_caption = '';
     $in_table = 0;
     foreach ($t as $k => $x) {
         $x = trim($x);
         if ($x == '') {
             // empty line, go to next line
             continue;
         }
         $fc = substr($x, 0, 1);
         //$matches = array();
         if (preg_match('/^(:*)\\{\\|(.*)$/', $x, $matches)) {
             /*
             				preg_match("/latexfmt=\"(.*?)\"/", $attributes, $latexformat);
             				$latexwidth = '\linewidth';
             				if ( preg_match("/latexwidth=\"(.*?)\"/", $attributes, $latexwidth_a) ) {
             					$latexwidth = $latexwidth_a[1];
             					$latexwidth = str_replace('\(\backslash{}\)', '\\', $latexwidth);
             				}
             
             				
             				$latexformat = $latexformat[1];
             				$latexformat = str_replace("\\", "", $latexformat);*/
             if ($in_table == 0) {
                 /* new top-level table, initialise arrays */
                 $latexformat = '';
                 $cellcount_max = array();
                 $cellcount_current = array();
                 $tableheader = array();
                 $thkr = array();
                 # table header index array
                 $th = 0;
             }
             $in_table++;
             array_push($ltd, '');
             array_push($tr, false);
             array_push($ltr, '');
             array_push($has_opened_tr, false);
             //Start of table: Extract LaTeX tips from attributes, make header.
             $attributes = $this->unstripForHTML($matches[2]);
             $this->debugMessage('Table: Attributes: ', $attributes);
             $attributes = str_replace($this->sc['backslash'], '\\', $attributes);
             $attributes_test = $this->parseAttrString($attributes);
             if (array_key_exists('latexfmt', $attributes_test)) {
                 $latexformat = $attributes_test['latexfmt'];
                 $latexformat = str_replace("\\", "", $latexformat);
                 $this->debugMessage('Table: latexfmt: ', $latexformat);
             }
             if (array_key_exists('latexwidth', $attributes_test)) {
                 $latexwidth = $attributes_test['latexwidth'];
                 $latexwidth = str_replace('\\(\\backslash{}\\)', '\\', $latexwidth);
                 $this->debugMessage('Table: latexwidth: ', $latexwidth);
             } else {
                 $latexwidth = '\\linewidth';
             }
             // start-of-table
             array_push($thkr, $k);
             $tableheader[$in_table]['width'] = $latexwidth;
             $tableheader[$in_table]['format'] = $latexformat;
             $cellcount_max[$in_table] = 0;
             // start-of-row
             $cellcount_current[$in_table] = 0;
             $this->addPackageDependency('tabularx');
             $firstCellOfRow = true;
         } else {
             if ('|}' == substr($x, 0, 2) || '|\\}' == substr($x, 0, 3)) {
                 //End of table. Pop stacks and print latex ending.
                 $l = array_pop($ltd);
                 if (!array_pop($has_opened_tr)) {
                     $t[$k - 1] = $t[$k - 1] . "\\tabularnewline \\hline";
                 }
                 if (array_pop($tr)) {
                     $t[$k - 1] = $t[$k - 1] . '\\tabularnewline \\hline';
                 }
                 array_pop($ltr);
                 // end-of-row code
                 $cellcount_max[$in_table] = max($cellcount_max[$in_table], $cellcount_current[$in_table]);
                 // end-of-table
                 $thk = array_pop($thkr);
                 $latexwidth = $tableheader[$in_table]['width'];
                 if ($tableheader[$in_table]['format'] == '') {
                     $latexformat = array();
                     for ($i = 0; $i < $cellcount_max[$in_table]; $i++) {
                         array_push($latexformat, 'Y');
                     }
                     $latexformat = '|' . implode('|', $latexformat) . '|';
                 } else {
                     $latexformat = $tableheader[$in_table]['format'];
                 }
                 if ($in_table > 1) {
                     $t[$thk] = "{\\begin{tabularx}{{$latexwidth}}{{$latexformat}}\\hline";
                     $t[$k] = "\\end{tabularx}}" . trim($ltx_caption);
                 } else {
                     // This table is not nested
                     $this->debugMessage('Table: inserted latexfmt: ', $latexformat);
                     $this->debugMessage('Table: inserted latexwidth ', $latexwidth);
                     wfRunHooks("w2lTableLaTeXAttributes", array(&$this, &$latexformat, &$latexwidth));
                     $table_head = "\\begin{tabularx}{{$latexwidth}}{{$latexformat}}\\hline";
                     $table_foot = "\\end{tabularx}\n" . trim($ltx_caption);
                     wfRunHooks("w2lTableHead", array(&$this, &$table_head));
                     wfRunHooks("w2lTableFoot", array(&$this, &$table_foot));
                     $t[$thk] = $table_head;
                     $t[$k] = $table_foot;
                     unset($table_head, $table_foot);
                 }
                 $in_table--;
                 $ltx_caption = '';
             } else {
                 if ('|-' == substr($x, 0, 2)) {
                     # Allows for |---------------
                     if (strpos($x, '----') == 1) {
                         $add_hline = '\\hline';
                     } else {
                         $add_hline = '';
                     }
                     $x = substr($x, 1);
                     while ($x != '' && substr($x, 0, 1) == '-') {
                         $x = substr($x, 1);
                     }
                     $z = '';
                     $l = array_pop($ltd);
                     array_pop($has_opened_tr);
                     array_push($has_opened_tr, true);
                     if (array_pop($tr)) {
                         $t[$k - 1] = $t[$k - 1] . '\\tabularnewline \\hline' . $add_hline;
                     }
                     array_pop($ltr);
                     $t[$k] = $z;
                     array_push($tr, false);
                     array_push($ltd, '');
                     // end-of-row
                     $cellcount_max[$in_table] = max($cellcount_max[$in_table], $cellcount_current[$in_table]);
                     // start-of-row
                     $cellcount_current[$in_table] = 0;
                     $attributes = $this->unstripForHTML($x);
                     array_push($ltr, Sanitizer::fixTagAttributes($attributes, 'tr'));
                     $firstCellOfRow = true;
                     $add_hline = '';
                     //$cellcounter[] = 0;
                 } else {
                     if (('|' === $fc || '!' === $fc || '|+' === substr($x, 0, 2)) && $in_table != 0) {
                         # Caption
                         # $x is a table row
                         if ('|+' == substr($x, 0, 2)) {
                             $fc = '+';
                             $x = substr($x, 1);
                         }
                         $after = substr($x, 1);
                         if ($fc == '!') {
                             $after = str_replace('!!', '||', $after);
                         }
                         // Split up multiple cells on the same line.
                         // FIXME: This can result in improper nesting of tags processed
                         // by earlier parser steps, but should avoid splitting up eg
                         // attribute values containing literal "||".
                         $cells = StringUtils::explodeMarkup('||', $after);
                         $t[$k] = '';
                         # Loop through each table cell
                         foreach ($cells as $theline) {
                             $z = '';
                             if ($fc != '+') {
                                 $tra = array_pop($ltr);
                                 if (!array_pop($tr)) {
                                     $z = "\n";
                                 }
                                 // has been: "\n"
                                 array_push($tr, true);
                                 array_push($ltr, '');
                                 // current-row-cell
                                 $cellcount_current[$in_table]++;
                                 array_pop($has_opened_tr);
                                 array_push($has_opened_tr, true);
                             }
                             $l = array_pop($ltd);
                             //heading cells and normal cells are equal in LaTeX:
                             if (($fc == '|' || $fc == '!') && !$firstCellOfRow) {
                                 $l = ' & ';
                             } else {
                                 if ($fc == '+') {
                                     $ltx_caption .= $theline;
                                     continue;
                                     //Missing support for caption here!
                                 } else {
                                     $l = '';
                                 }
                             }
                             //$firstCellOfRow = false;
                             array_push($ltd, $l);
                             # Cell parameters
                             $y = explode('|', $theline, 2);
                             # Note that a '|' inside an invalid link should not
                             # be mistaken as delimiting cell parameters
                             if (strpos($y[0], '[[') !== false) {
                                 $y = array($theline);
                             }
                             if (count($y) == 1) {
                                 $y[0] = $this->fixContentforTableCells($y[0]);
                                 if ($fc == '!') {
                                     //Heading cell highlighting
                                     $y = "{$z}{$l}" . "\\textbf{" . "{$y[0]}}";
                                 } else {
                                     $y = "{$z}{$l}{$y[0]}";
                                 }
                             } else {
                                 $attributes = $this->unstripForHTML($y[0]);
                                 $multi_col = $this->checkColspan($attributes);
                                 //$y = "{$z}<{$l}".Sanitizer::fixTagAttributes($attributes, $l).">{$y[1]}" ;
                                 if ($firstCellOfRow == false) {
                                     $addSep = '&';
                                 } else {
                                     $addSep = '';
                                 }
                                 $y = "{$z}" . $addSep . '\\multicolumn{' . $multi_col['colspan'] . '}{' . $multi_col['latexfmt'] . '}{' . $y[1] . '}';
                             }
                             $firstCellOfRow = false;
                             // was some lines up...
                             $t[$k] .= $y;
                             $anyCells = true;
                         }
                     }
                 }
             }
         }
     }
     $t = implode("\n", $t);
     # special case: don't return empty table
     //if(!$anyCells) $t = '';
     //$t .= trim($ltx_caption);
     return $t;
 }
 /**
  * @dataProvider provideDeprecatedAttributes
  */
 function testDeprecatedAttributes($input, $tag, $expected, $message = null)
 {
     $GLOBALS['wgCleanupPresentationalAttributes'] = true;
     $this->assertEquals($expected, Sanitizer::fixTagAttributes($input, $tag), $message);
 }
Exemple #14
0
 /**
  * Cleans up HTML, removes dangerous tags and attributes, and
  * removes HTML comments
  * @param string $text
  * @param callable $processCallback Callback to do any variable or parameter
  *   replacements in HTML attribute values
  * @param array|bool $args Arguments for the processing callback
  * @param array $extratags For any extra tags to include
  * @param array $removetags For any tags (default or extra) to exclude
  * @return string
  */
 public static function removeHTMLtags($text, $processCallback = null, $args = array(), $extratags = array(), $removetags = array())
 {
     extract(self::getRecognizedTagData($extratags, $removetags));
     # Remove HTML comments
     $text = Sanitizer::removeHTMLcomments($text);
     $bits = explode('<', $text);
     $text = str_replace('>', '&gt;', array_shift($bits));
     if (!MWTidy::isEnabled()) {
         $tagstack = $tablestack = array();
         foreach ($bits as $x) {
             $regs = array();
             # $slash: Does the current element start with a '/'?
             # $t: Current element name
             # $params: String between element name and >
             # $brace: Ending '>' or '/>'
             # $rest: Everything until the next element of $bits
             if (preg_match(self::ELEMENT_BITS_REGEX, $x, $regs)) {
                 list(, $slash, $t, $params, $brace, $rest) = $regs;
             } else {
                 $slash = $t = $params = $brace = $rest = null;
             }
             $badtag = false;
             if (isset($htmlelements[$t = strtolower($t)])) {
                 # Check our stack
                 if ($slash && isset($htmlsingleonly[$t])) {
                     $badtag = true;
                 } elseif ($slash) {
                     # Closing a tag... is it the one we just opened?
                     MediaWiki\suppressWarnings();
                     $ot = array_pop($tagstack);
                     MediaWiki\restoreWarnings();
                     if ($ot != $t) {
                         if (isset($htmlsingleallowed[$ot])) {
                             # Pop all elements with an optional close tag
                             # and see if we find a match below them
                             $optstack = array();
                             array_push($optstack, $ot);
                             MediaWiki\suppressWarnings();
                             $ot = array_pop($tagstack);
                             MediaWiki\restoreWarnings();
                             while ($ot != $t && isset($htmlsingleallowed[$ot])) {
                                 array_push($optstack, $ot);
                                 MediaWiki\suppressWarnings();
                                 $ot = array_pop($tagstack);
                                 MediaWiki\restoreWarnings();
                             }
                             if ($t != $ot) {
                                 # No match. Push the optional elements back again
                                 $badtag = true;
                                 MediaWiki\suppressWarnings();
                                 $ot = array_pop($optstack);
                                 MediaWiki\restoreWarnings();
                                 while ($ot) {
                                     array_push($tagstack, $ot);
                                     MediaWiki\suppressWarnings();
                                     $ot = array_pop($optstack);
                                     MediaWiki\restoreWarnings();
                                 }
                             }
                         } else {
                             MediaWiki\suppressWarnings();
                             array_push($tagstack, $ot);
                             MediaWiki\restoreWarnings();
                             # <li> can be nested in <ul> or <ol>, skip those cases:
                             if (!isset($htmllist[$ot]) || !isset($listtags[$t])) {
                                 $badtag = true;
                             }
                         }
                     } else {
                         if ($t == 'table') {
                             $tagstack = array_pop($tablestack);
                         }
                     }
                     $newparams = '';
                 } else {
                     # Keep track for later
                     if (isset($tabletags[$t]) && !in_array('table', $tagstack)) {
                         $badtag = true;
                     } elseif (in_array($t, $tagstack) && !isset($htmlnest[$t])) {
                         $badtag = true;
                         # Is it a self closed htmlpair ? (bug 5487)
                     } elseif ($brace == '/>' && isset($htmlpairs[$t])) {
                         $badtag = true;
                     } elseif (isset($htmlsingleonly[$t])) {
                         # Hack to force empty tag for unclosable elements
                         $brace = '/>';
                     } elseif (isset($htmlsingle[$t])) {
                         # Hack to not close $htmlsingle tags
                         $brace = null;
                         # Still need to push this optionally-closed tag to
                         # the tag stack so that we can match end tags
                         # instead of marking them as bad.
                         array_push($tagstack, $t);
                     } elseif (isset($tabletags[$t]) && in_array($t, $tagstack)) {
                         // New table tag but forgot to close the previous one
                         $text .= "</{$t}>";
                     } else {
                         if ($t == 'table') {
                             array_push($tablestack, $tagstack);
                             $tagstack = array();
                         }
                         array_push($tagstack, $t);
                     }
                     # Replace any variables or template parameters with
                     # plaintext results.
                     if (is_callable($processCallback)) {
                         call_user_func_array($processCallback, array(&$params, $args));
                     }
                     if (!Sanitizer::validateTag($params, $t)) {
                         $badtag = true;
                     }
                     # Strip non-approved attributes from the tag
                     $newparams = Sanitizer::fixTagAttributes($params, $t);
                 }
                 if (!$badtag) {
                     $rest = str_replace('>', '&gt;', $rest);
                     $close = $brace == '/>' && !$slash ? ' /' : '';
                     $text .= "<{$slash}{$t}{$newparams}{$close}>{$rest}";
                     continue;
                 }
             }
             $text .= '&lt;' . str_replace('>', '&gt;', $x);
         }
         # Close off any remaining tags
         while (is_array($tagstack) && ($t = array_pop($tagstack))) {
             $text .= "</{$t}>\n";
             if ($t == 'table') {
                 $tagstack = array_pop($tablestack);
             }
         }
     } else {
         # this might be possible using tidy itself
         foreach ($bits as $x) {
             if (preg_match(self::ELEMENT_BITS_REGEX, $x, $regs)) {
                 list(, $slash, $t, $params, $brace, $rest) = $regs;
                 $badtag = false;
                 if (isset($htmlelements[$t = strtolower($t)])) {
                     if (is_callable($processCallback)) {
                         call_user_func_array($processCallback, array(&$params, $args));
                     }
                     if (!Sanitizer::validateTag($params, $t)) {
                         $badtag = true;
                     }
                     $newparams = Sanitizer::fixTagAttributes($params, $t);
                     if (!$badtag) {
                         $rest = str_replace('>', '&gt;', $rest);
                         $text .= "<{$slash}{$t}{$newparams}{$brace}{$rest}";
                         continue;
                     }
                 }
             }
             $text .= '&lt;' . str_replace('>', '&gt;', $x);
         }
     }
     return $text;
 }
Exemple #15
0
 /**
  * @dataProvider provideDeprecatedAttributes
  * @covers Sanitizer::fixTagAttributes
  */
 public function testDeprecatedAttributesUnaltered($inputAttr, $inputEl, $message = '')
 {
     $this->assertEquals(" {$inputAttr}", Sanitizer::fixTagAttributes($inputAttr, $inputEl), $message);
 }
Exemple #16
0
 /**
  * parse the wiki syntax used to render tables
  *
  * @private
  * @return string
  */
 function doTableStuff($text)
 {
     wfProfileIn(__METHOD__);
     $lines = StringUtils::explode("\n", $text);
     $out = '';
     $td_history = array();
     # Is currently a td tag open?
     $last_tag_history = array();
     # Save history of last lag activated (td, th or caption)
     $tr_history = array();
     # Is currently a tr tag open?
     $tr_attributes = array();
     # history of tr attributes
     $has_opened_tr = array();
     # Did this table open a <tr> element?
     $indent_level = 0;
     # indent level of the table
     foreach ($lines as $outLine) {
         $line = trim($outLine);
         if ($line === '') {
             # empty line, go to next line
             $out .= $outLine . "\n";
             continue;
         }
         $first_character = $line[0];
         $matches = array();
         if (preg_match('/^(:*)\\{\\|(.*)$/', $line, $matches)) {
             # First check if we are starting a new table
             $indent_level = strlen($matches[1]);
             $attributes = $this->mStripState->unstripBoth($matches[2]);
             $attributes = Sanitizer::fixTagAttributes($attributes, 'table');
             $outLine = str_repeat('<dl><dd>', $indent_level) . "<table{$attributes}>";
             array_push($td_history, false);
             array_push($last_tag_history, '');
             array_push($tr_history, false);
             array_push($tr_attributes, '');
             array_push($has_opened_tr, false);
         } elseif (count($td_history) == 0) {
             # Don't do any of the following
             $out .= $outLine . "\n";
             continue;
         } elseif (substr($line, 0, 2) === '|}') {
             # We are ending a table
             $line = '</table>' . substr($line, 2);
             $last_tag = array_pop($last_tag_history);
             if (!array_pop($has_opened_tr)) {
                 $line = "<tr><td></td></tr>{$line}";
             }
             if (array_pop($tr_history)) {
                 $line = "</tr>{$line}";
             }
             if (array_pop($td_history)) {
                 $line = "</{$last_tag}>{$line}";
             }
             array_pop($tr_attributes);
             $outLine = $line . str_repeat('</dd></dl>', $indent_level);
         } elseif (substr($line, 0, 2) === '|-') {
             # Now we have a table row
             $line = preg_replace('#^\\|-+#', '', $line);
             # Whats after the tag is now only attributes
             $attributes = $this->mStripState->unstripBoth($line);
             $attributes = Sanitizer::fixTagAttributes($attributes, 'tr');
             array_pop($tr_attributes);
             array_push($tr_attributes, $attributes);
             $line = '';
             $last_tag = array_pop($last_tag_history);
             array_pop($has_opened_tr);
             array_push($has_opened_tr, true);
             if (array_pop($tr_history)) {
                 $line = '</tr>';
             }
             if (array_pop($td_history)) {
                 $line = "</{$last_tag}>{$line}";
             }
             $outLine = $line;
             array_push($tr_history, false);
             array_push($td_history, false);
             array_push($last_tag_history, '');
         } elseif ($first_character === '|' || $first_character === '!' || substr($line, 0, 2) === '|+') {
             # This might be cell elements, td, th or captions
             if (substr($line, 0, 2) === '|+') {
                 $first_character = '+';
                 $line = substr($line, 1);
             }
             $line = substr($line, 1);
             if ($first_character === '!') {
                 $line = str_replace('!!', '||', $line);
             }
             # Split up multiple cells on the same line.
             # FIXME : This can result in improper nesting of tags processed
             # by earlier parser steps, but should avoid splitting up eg
             # attribute values containing literal "||".
             $cells = StringUtils::explodeMarkup('||', $line);
             $outLine = '';
             # Loop through each table cell
             foreach ($cells as $cell) {
                 $previous = '';
                 if ($first_character !== '+') {
                     $tr_after = array_pop($tr_attributes);
                     if (!array_pop($tr_history)) {
                         $previous = "<tr{$tr_after}>\n";
                     }
                     array_push($tr_history, true);
                     array_push($tr_attributes, '');
                     array_pop($has_opened_tr);
                     array_push($has_opened_tr, true);
                 }
                 $last_tag = array_pop($last_tag_history);
                 if (array_pop($td_history)) {
                     $previous = "</{$last_tag}>\n{$previous}";
                 }
                 if ($first_character === '|') {
                     $last_tag = 'td';
                 } elseif ($first_character === '!') {
                     $last_tag = 'th';
                 } elseif ($first_character === '+') {
                     $last_tag = 'caption';
                 } else {
                     $last_tag = '';
                 }
                 array_push($last_tag_history, $last_tag);
                 # A cell could contain both parameters and data
                 $cell_data = explode('|', $cell, 2);
                 # Bug 553: Note that a '|' inside an invalid link should not
                 # be mistaken as delimiting cell parameters
                 if (strpos($cell_data[0], '[[') !== false) {
                     $cell = "{$previous}<{$last_tag}>{$cell}";
                 } elseif (count($cell_data) == 1) {
                     $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
                 } else {
                     $attributes = $this->mStripState->unstripBoth($cell_data[0]);
                     $attributes = Sanitizer::fixTagAttributes($attributes, $last_tag);
                     $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
                 }
                 $outLine .= $cell;
                 array_push($td_history, true);
             }
         }
         $out .= $outLine . "\n";
     }
     # Closing open td, tr && table
     while (count($td_history) > 0) {
         if (array_pop($td_history)) {
             $out .= "</td>\n";
         }
         if (array_pop($tr_history)) {
             $out .= "</tr>\n";
         }
         if (!array_pop($has_opened_tr)) {
             $out .= "<tr><td></td></tr>\n";
         }
         $out .= "</table>\n";
     }
     # Remove trailing line-ending (b/c)
     if (substr($out, -1) === "\n") {
         $out = substr($out, 0, -1);
     }
     # special case: don't return empty table
     if ($out === "<table>\n<tr><td></td></tr>\n</table>") {
         $out = '';
     }
     wfProfileOut(__METHOD__);
     return $out;
 }
 /**
  * Cleans up HTML, removes dangerous tags and attributes, and
  * removes HTML comments
  * @param string $text
  * @param callable $processCallback Callback to do any variable or parameter
  *   replacements in HTML attribute values
  * @param array|bool $args Arguments for the processing callback
  * @param array $extratags For any extra tags to include
  * @param array $removetags For any tags (default or extra) to exclude
  * @return string
  */
 public static function removeHTMLtags($text, $processCallback = null, $args = array(), $extratags = array(), $removetags = array())
 {
     global $wgUseTidy, $wgAllowMicrodataAttributes, $wgAllowImageTag;
     static $htmlpairsStatic, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags, $htmllist, $listtags, $htmlsingleallowed, $htmlelementsStatic, $staticInitialised;
     // Base our staticInitialised variable off of the global config state so that if the globals
     // are changed (like in the screwed up test system) we will re-initialise the settings.
     $globalContext = implode('-', compact('wgAllowMicrodataAttributes', 'wgAllowImageTag'));
     if (!$staticInitialised || $staticInitialised != $globalContext) {
         $htmlpairsStatic = array('b', 'bdi', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's', 'strike', 'strong', 'tt', 'var', 'div', 'center', 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre', 'ruby', 'rb', 'rp', 'rt', 'rtc', 'p', 'span', 'abbr', 'dfn', 'kbd', 'samp', 'data', 'time', 'mark');
         $htmlsingle = array('br', 'wbr', 'hr', 'li', 'dt', 'dd');
         $htmlsingleonly = array('br', 'wbr', 'hr');
         if ($wgAllowMicrodataAttributes) {
             $htmlsingle[] = $htmlsingleonly[] = 'meta';
             $htmlsingle[] = $htmlsingleonly[] = 'link';
         }
         $htmlnest = array('table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul', 'li', 'dl', 'dt', 'dd', 'font', 'big', 'small', 'sub', 'sup', 'span', 'var', 'kbd', 'samp', 'em', 'strong', 'q', 'ruby', 'bdo');
         $tabletags = array('td', 'th', 'tr');
         $htmllist = array('ul', 'ol');
         $listtags = array('li');
         if ($wgAllowImageTag) {
             $htmlsingle[] = 'img';
             $htmlsingleonly[] = 'img';
         }
         $htmlsingleallowed = array_unique(array_merge($htmlsingle, $tabletags));
         $htmlelementsStatic = array_unique(array_merge($htmlsingle, $htmlpairsStatic, $htmlnest));
         # Convert them all to hashtables for faster lookup
         $vars = array('htmlpairsStatic', 'htmlsingle', 'htmlsingleonly', 'htmlnest', 'tabletags', 'htmllist', 'listtags', 'htmlsingleallowed', 'htmlelementsStatic');
         foreach ($vars as $var) {
             ${$var} = array_flip(${$var});
         }
         $staticInitialised = $globalContext;
     }
     # Populate $htmlpairs and $htmlelements with the $extratags and $removetags arrays
     $extratags = array_flip($extratags);
     $removetags = array_flip($removetags);
     $htmlpairs = array_merge($extratags, $htmlpairsStatic);
     $htmlelements = array_diff_key(array_merge($extratags, $htmlelementsStatic), $removetags);
     # Remove HTML comments
     $text = Sanitizer::removeHTMLcomments($text);
     $bits = explode('<', $text);
     $text = str_replace('>', '&gt;', array_shift($bits));
     if (!$wgUseTidy) {
         $tagstack = $tablestack = array();
         foreach ($bits as $x) {
             $regs = array();
             # $slash: Does the current element start with a '/'?
             # $t: Current element name
             # $params: String between element name and >
             # $brace: Ending '>' or '/>'
             # $rest: Everything until the next element of $bits
             if (preg_match(self::ELEMENT_BITS_REGEX, $x, $regs)) {
                 list(, $slash, $t, $params, $brace, $rest) = $regs;
             } else {
                 $slash = $t = $params = $brace = $rest = null;
             }
             $badtag = false;
             if (isset($htmlelements[$t = strtolower($t)])) {
                 # Check our stack
                 if ($slash && isset($htmlsingleonly[$t])) {
                     $badtag = true;
                 } elseif ($slash) {
                     # Closing a tag... is it the one we just opened?
                     wfSuppressWarnings();
                     $ot = array_pop($tagstack);
                     wfRestoreWarnings();
                     if ($ot != $t) {
                         if (isset($htmlsingleallowed[$ot])) {
                             # Pop all elements with an optional close tag
                             # and see if we find a match below them
                             $optstack = array();
                             array_push($optstack, $ot);
                             wfSuppressWarnings();
                             $ot = array_pop($tagstack);
                             wfRestoreWarnings();
                             while ($ot != $t && isset($htmlsingleallowed[$ot])) {
                                 array_push($optstack, $ot);
                                 wfSuppressWarnings();
                                 $ot = array_pop($tagstack);
                                 wfRestoreWarnings();
                             }
                             if ($t != $ot) {
                                 # No match. Push the optional elements back again
                                 $badtag = true;
                                 wfSuppressWarnings();
                                 $ot = array_pop($optstack);
                                 wfRestoreWarnings();
                                 while ($ot) {
                                     array_push($tagstack, $ot);
                                     wfSuppressWarnings();
                                     $ot = array_pop($optstack);
                                     wfRestoreWarnings();
                                 }
                             }
                         } else {
                             wfSuppressWarnings();
                             array_push($tagstack, $ot);
                             wfRestoreWarnings();
                             # <li> can be nested in <ul> or <ol>, skip those cases:
                             if (!isset($htmllist[$ot]) || !isset($listtags[$t])) {
                                 $badtag = true;
                             }
                         }
                     } else {
                         if ($t == 'table') {
                             $tagstack = array_pop($tablestack);
                         }
                     }
                     $newparams = '';
                 } else {
                     # Keep track for later
                     if (isset($tabletags[$t]) && !in_array('table', $tagstack)) {
                         $badtag = true;
                     } elseif (in_array($t, $tagstack) && !isset($htmlnest[$t])) {
                         $badtag = true;
                         # Is it a self closed htmlpair ? (bug 5487)
                     } elseif ($brace == '/>' && isset($htmlpairs[$t])) {
                         $badtag = true;
                     } elseif (isset($htmlsingleonly[$t])) {
                         # Hack to force empty tag for unclosable elements
                         $brace = '/>';
                     } elseif (isset($htmlsingle[$t])) {
                         # Hack to not close $htmlsingle tags
                         $brace = null;
                         # Still need to push this optionally-closed tag to
                         # the tag stack so that we can match end tags
                         # instead of marking them as bad.
                         array_push($tagstack, $t);
                     } elseif (isset($tabletags[$t]) && in_array($t, $tagstack)) {
                         // New table tag but forgot to close the previous one
                         $text .= "</{$t}>";
                     } else {
                         if ($t == 'table') {
                             array_push($tablestack, $tagstack);
                             $tagstack = array();
                         }
                         array_push($tagstack, $t);
                     }
                     # Replace any variables or template parameters with
                     # plaintext results.
                     if (is_callable($processCallback)) {
                         call_user_func_array($processCallback, array(&$params, $args));
                     }
                     if (!Sanitizer::validateTag($params, $t)) {
                         $badtag = true;
                     }
                     # Strip non-approved attributes from the tag
                     $newparams = Sanitizer::fixTagAttributes($params, $t);
                 }
                 if (!$badtag) {
                     $rest = str_replace('>', '&gt;', $rest);
                     $close = $brace == '/>' && !$slash ? ' /' : '';
                     $text .= "<{$slash}{$t}{$newparams}{$close}>{$rest}";
                     continue;
                 }
             }
             $text .= '&lt;' . str_replace('>', '&gt;', $x);
         }
         # Close off any remaining tags
         while (is_array($tagstack) && ($t = array_pop($tagstack))) {
             $text .= "</{$t}>\n";
             if ($t == 'table') {
                 $tagstack = array_pop($tablestack);
             }
         }
     } else {
         # this might be possible using tidy itself
         foreach ($bits as $x) {
             if (preg_match(self::ELEMENT_BITS_REGEX, $x, $regs)) {
                 list(, $slash, $t, $params, $brace, $rest) = $regs;
                 $badtag = false;
                 if (isset($htmlelements[$t = strtolower($t)])) {
                     if (is_callable($processCallback)) {
                         call_user_func_array($processCallback, array(&$params, $args));
                     }
                     if (!Sanitizer::validateTag($params, $t)) {
                         $badtag = true;
                     }
                     $newparams = Sanitizer::fixTagAttributes($params, $t);
                     if (!$badtag) {
                         $rest = str_replace('>', '&gt;', $rest);
                         $text .= "<{$slash}{$t}{$newparams}{$brace}{$rest}";
                         continue;
                     }
                 }
             }
             $text .= '&lt;' . str_replace('>', '&gt;', $x);
         }
     }
     return $text;
 }