/**
  * iterate through the nodes and decide what we
  * shall do with the current node
  *
  * @param void
  * @return void
  */
 protected function parse()
 {
     $this->output = '';
     // drop tags
     $this->parser->html = preg_replace('#<(' . implode('|', $this->drop) . ')[^>]*>.*</\\1>#sU', '', $this->parser->html);
     while ($this->parser->nextNode()) {
         switch ($this->parser->nodeType) {
             case 'doctype':
                 break;
             case 'pi':
             case 'comment':
                 if ($this->keepHTML) {
                     $this->flushLinebreaks();
                     $this->out($this->parser->node);
                     $this->setLineBreaks(2);
                 }
                 // else drop
                 break;
             case 'text':
                 $this->handleText();
                 break;
             case 'tag':
                 if (in_array($this->parser->tagName, $this->ignore)) {
                     break;
                 }
                 // If the previous tag was not a block element, we simulate a paragraph tag
                 if ($this->parser->isBlockElement && $this->parser->isNextToInlineContext && !in_array($this->parent(), $this->allowMixedChildren)) {
                     $this->setLineBreaks(2);
                 }
                 if ($this->parser->isStartTag) {
                     $this->flushLinebreaks();
                 }
                 if ($this->skipConversion) {
                     $this->isMarkdownable();
                     // update notConverted
                     $this->handleTagToText();
                     continue;
                 }
                 if (!$this->parser->keepWhitespace && $this->parser->isBlockElement && $this->parser->isStartTag) {
                     $this->parser->html = ltrim($this->parser->html);
                 }
                 if ($this->isMarkdownable()) {
                     if ($this->parser->isBlockElement && $this->parser->isStartTag && !$this->lastWasBlockTag && !empty($this->output)) {
                         if (!empty($this->buffer)) {
                             $str =& $this->buffer[count($this->buffer) - 1];
                         } else {
                             $str =& $this->output;
                         }
                         if (substr($str, -strlen($this->indent) - 1) != "\n" . $this->indent) {
                             $str .= "\n" . $this->indent;
                         }
                     }
                     $func = 'handleTag_' . $this->parser->tagName;
                     $this->{$func}();
                     if ($this->linkPosition == self::LINK_AFTER_PARAGRAPH && $this->parser->isBlockElement && !$this->parser->isStartTag && empty($this->parser->openTags)) {
                         $this->flushFootnotes();
                     }
                     if (!$this->parser->isStartTag) {
                         $this->lastClosedTag = $this->parser->tagName;
                     }
                 } else {
                     $this->handleTagToText();
                     $this->lastClosedTag = '';
                 }
                 break;
             default:
                 trigger_error('invalid node type', E_USER_ERROR);
                 break;
         }
         $this->lastWasBlockTag = $this->parser->nodeType == 'tag' && $this->parser->isStartTag && $this->parser->isBlockElement;
     }
     if (!empty($this->buffer)) {
         // trigger_error('buffer was not flushed, this is a bug. please report!', E_USER_WARNING);
         while (!empty($this->buffer)) {
             $this->out($this->unbuffer());
         }
     }
     // cleanup
     $this->output = rtrim(str_replace('&amp;', '&', str_replace('&lt;', '<', str_replace('&gt;', '>', $this->output))));
     // end parsing, flush stacked tags
     $this->flushFootnotes();
     $this->stack = array();
 }
 /**
  * iterate through the nodes and decide what we
  * shall do with the current node
  *
  * @param void
  * @return void
  */
 function parse()
 {
     $this->output = '';
     # drop tags
     $this->parser->html = preg_replace('#<(' . implode('|', $this->drop) . ')[^>]*>.*</\\1>#sU', '', $this->parser->html);
     while ($this->parser->nextNode()) {
         switch ($this->parser->nodeType) {
             case 'doctype':
                 break;
             case 'pi':
             case 'comment':
                 if ($this->keepHTML) {
                     $this->flushLinebreaks();
                     $this->out($this->parser->node);
                     $this->setLineBreaks(2);
                 }
                 # else drop
                 break;
             case 'text':
                 $this->handleText();
                 break;
             case 'tag':
                 if (in_array($this->parser->tagName, $this->ignore)) {
                     break;
                 }
                 if ($this->parser->isStartTag) {
                     $this->flushLinebreaks();
                 }
                 if ($this->skipConversion) {
                     $this->isMarkdownable();
                     # update notConverted
                     $this->handleTagToText();
                     continue;
                 }
                 if (!$this->parser->keepWhitespace && $this->parser->isBlockElement && $this->parser->isStartTag) {
                     $this->parser->html = ltrim($this->parser->html);
                 }
                 if ($this->isMarkdownable()) {
                     if ($this->parser->isBlockElement && $this->parser->isStartTag && !$this->lastWasBlockTag && !empty($this->output)) {
                         if (!empty($this->buffer)) {
                             $str =& $this->buffer[count($this->buffer) - 1];
                         } else {
                             $str =& $this->output;
                         }
                         if (substr($str, -strlen($this->indent) - 1) != "\n" . $this->indent) {
                             $str .= "\n" . $this->indent;
                         }
                     }
                     $func = 'handleTag_' . $this->parser->tagName;
                     $this->{$func}();
                     if ($this->linksAfterEachParagraph && $this->parser->isBlockElement && !$this->parser->isStartTag && empty($this->parser->openTags)) {
                         $this->flushStacked();
                     }
                     if (!$this->parser->isStartTag) {
                         $this->lastClosedTag = $this->parser->tagName;
                     }
                 } else {
                     $this->handleTagToText();
                     $this->lastClosedTag = '';
                 }
                 break;
             default:
                 trigger_error('invalid node type', E_USER_ERROR);
                 break;
         }
         $this->lastWasBlockTag = $this->parser->nodeType == 'tag' && $this->parser->isStartTag && $this->parser->isBlockElement;
     }
     if (!empty($this->buffer)) {
         trigger_error('buffer was not flushed, this is a bug. please report!', E_USER_WARNING);
         while (!empty($this->buffer)) {
             $this->out($this->unbuffer());
         }
     }
     ### cleanup
     $this->output = rtrim(str_replace('&amp;', '&', str_replace('&lt;', '<', str_replace('&gt;', '>', $this->output))));
     # end parsing, flush stacked tags
     $this->flushStacked();
     $this->stack = array();
 }
Exemple #3
0
/**
 * indent a HTML string properly
 *
 * @param string $html
 * @param string $indent optional
 * @return string
 */
function indentHTML($html, $indent = "  ")
{
    $parser = new parseHTML();
    $parser->html = $html;
    $html = '';
    $last = true;
    # last tag was block elem
    $indent_a = array();
    while ($parser->nextNode()) {
        if ($parser->nodeType == 'tag') {
            $parser->normalizeNode();
        }
        if ($parser->nodeType == 'tag' && $parser->isBlockElement) {
            $isPreOrCode = in_array($parser->tagName, array('code', 'pre'));
            if (!$parser->keepWhitespace && !$last && !$isPreOrCode) {
                $html = rtrim($html) . "\n";
            }
            if ($parser->isStartTag) {
                $html .= implode($indent_a);
                if (!$parser->isEmptyTag) {
                    array_push($indent_a, $indent);
                }
            } else {
                array_pop($indent_a);
                if (!$isPreOrCode) {
                    $html .= implode($indent_a);
                }
            }
            $html .= $parser->node;
            if (!$parser->keepWhitespace && !($isPreOrCode && $parser->isStartTag)) {
                $html .= "\n";
            }
            $last = true;
        } else {
            if ($parser->nodeType == 'tag' && $parser->tagName == 'br') {
                $html .= $parser->node . "\n";
                $last = true;
                continue;
            } elseif ($last && !$parser->keepWhitespace) {
                $html .= implode($indent_a);
                $parser->node = ltrim($parser->node);
            }
            $html .= $parser->node;
            if (in_array($parser->nodeType, array('comment', 'pi', 'doctype'))) {
                $html .= "\n";
            } else {
                $last = false;
            }
        }
    }
    return $html;
}