public function parse($string) { $result = array(); foreach ($this->descriptors as $descriptor) { $regex = $descriptor->getRegex(); $matches = null; preg_match_all($regex, $string, $matches, PREG_OFFSET_CAPTURE); $flag = $descriptor->getFlag(); if ($flag === null) { $flag = $regex; } foreach ($matches[0] as $match) { $index = $match[1]; $chunk = $match[0]; if (isset($result[$index])) { $firstString = $result[$index][0]; $secondString = $chunk; $firstFlag = StringExploder::flagToString($result[$index][1]); $secondFlag = StringExploder::flagToString($flag); throw new Exception("Conflict between {$firstFlag} and {$secondFlag} at {$index} ('{$firstString}' or '{$secondString}')"); } else { $result[$index] = array($chunk, $flag); } } } $result[strlen($string)] = null; ksort($result); $previousIndex = 0; $previousLength = 0; foreach ($result as $index => $entry) { $delta = $index - $previousLength - $previousIndex; if ($delta > 0) { $newIndex = $previousIndex + $previousLength; $result[$newIndex] = substr($string, $newIndex, $delta); } else { if ($delta < 0) { $firstString = $result[$previousIndex][0]; $secondString = $entry[0]; $firstFlag = StringExploder::flagToString($result[$previousIndex][1]); $secondFlag = StringExploder::flagToString($entry[1]); throw new Exception("Conflict between {$firstFlag} ('{$firstString}' at {$previousIndex}) and {$secondFlag} ('{$secondString}' at {$index})"); } else { // empty case, no modification } } $previousIndex = $index; $previousLength = strlen($entry[0]); } ksort($result); unset($result[strlen($string)]); return $result; }
public static function indentHtml($html, $tab = "\t", $wrapSize = 80) { // remove useless blank characters $html = preg_replace("#\\s+#i", " ", $html); // explode the code $exploder = new StringExploder(); $exploder->addDescriptor(new StringExploderDescriptor("#<[^/\\s][^>]*>#", true)); $exploder->addDescriptor(new StringExploderDescriptor("#</[^>]+>#", false)); $array = $exploder->parse($html); // ignore auto closing tags foreach ($array as $index => $row) { if (is_array($row) && preg_match("#/>\$#", $row[0])) { $array[$index] = $row[0]; } else { // do nothing to simple strings } } // ignore typical tags which care about the exact content $indexMerge = -1; $mergingLevel = 0; foreach ($array as $index => $row) { if ($indexMerge != -1) { $content = is_array($row) ? $row[0] : $row; $array[$indexMerge] .= $content; unset($array[$index]); if (is_array($row)) { $mergingLevel += $row[1] ? 1 : -1; } if ($mergingLevel <= 0) { $indexMerge = -1; } } else { if (is_array($row) && preg_match("#^</?(code|pre)#", $row[0])) { $array[$index] = $row[0]; $indexMerge = $index; $mergingLevel = 1; } else { // do nothing to a string out of such block } } } // consistency check $temp = array_filter($array, function ($row) { return is_array($row); }); $indexStack = array(); $tagStack = array(); // var_dump($temp); foreach ($temp as $index => $row) { if ($row[1]) { array_push($indexStack, $index); } else { $openIndex = array_pop($indexStack); $openTag = $temp[$openIndex][0]; $closeTag = $row[0]; $tag = substr($closeTag, 2, strlen($closeTag) - 3); if (strpos($openTag, '<' . $tag) !== 0) { var_dump($temp); throw new Exception(htmlentities($closeTag) . " at {$index} does not close " . htmlentities($openTag) . " at {$openIndex}."); } else { unset($temp[$openIndex]); unset($temp[$index]); } } } if (!empty($temp)) { var_dump($temp); throw new Exception("Some tags are in conflict."); } else { // all seems consistent, continue } $diff = null; $refArray = $array; do { // ignore small tags $indexes = null; $concat = ""; foreach ($array as $index => $row) { if (is_array($row)) { if ($row[1]) { $indexes = array($index); $concat = $row[0]; } else { if (!empty($indexes)) { $indexes[] = $index; $concat .= $row[0]; if (strlen($concat) < $wrapSize) { foreach ($indexes as $i) { unset($array[$i]); } $array[$indexes[0]] = $concat; } $indexes = null; } else { // closing tag of a higher level, let as is } } } else { if (!empty($indexes)) { $indexes[] = $index; $concat .= $row; } } } ksort($array); // merge consecutive ignored parts $refIndex = null; foreach ($array as $index => $row) { if (is_string($row)) { if ($refIndex !== null) { $array[$refIndex] .= $array[$index]; unset($array[$index]); } else { $refIndex = $index; } } else { $refIndex = null; } } // $diff = array_diff_assoc($refArray, $array); $diff = Format::array_diff_values($refArray, $array); $refArray = $array; } while (!empty($diff)); // force new lines around tags which are naturally displayed as blocks foreach ($array as $index => $row) { if (is_string($row)) { $blockTags = "(?:li|ul|ol|div|h\\d|hr)"; $autoCloseTags = "(?:br|hr)"; $attributes = " ?[^>]*"; $out = "[^\n]"; $row = preg_replace("#({$out})(<{$blockTags}{$attributes}>)#", "\$1\n\$2", $row); $row = preg_replace("#(</\\s*{$blockTags}\\s*>)(?={$out})#", "\$1\n", $row); $row = preg_replace("#(<{$autoCloseTags}{$attributes}/>)(?={$out})#", "\$1\n", $row); Format::introduceRows($array, $index, $row); } else { // do nothing on array elements } } ksort($array); // wrap too long lines foreach ($array as $index => $row) { if (is_string($row) && !preg_match("#^</?(code|pre)#", $row)) { if (strlen($row) > $wrapSize) { Format::introduceRows($array, $index, wordwrap($row, $wrapSize, "\n", false)); } else { // do nothing to small lines } } else { // do nothing to array parts } } ksort($array); // wrap too long open tags, with preindent foreach ($array as $index => $row) { if (is_array($row) && $row[1]) { $content = $row[0]; if (strlen($content) > $wrapSize) { $content = preg_replace("#\\s*(\\S+\\s*=\\s*\"[^\"]*\")#", "\n{$tab}\$1", $content); $content = preg_replace("#\\s*(\\S+\\s*=\\s*'[^']*')#", "\n{$tab}\$1", $content); $content = preg_replace("#\n{$tab}#", " ", $content, 1); Format::introduceRows($array, $index, $content); $array[$index] = array($array[$index], true); } else { // do nothing to small lines } } else { // do nothing to simple strings and closing tags } } ksort($array); // indent lines $indent = ""; foreach ($array as $index => $row) { if (is_array($row)) { if ($row[1]) { $array[$index] = $indent . $row[0]; $indent .= $tab; } else { $indent = substr($indent, strlen($tab)); $array[$index] = $indent . $row[0]; } } else { $array[$index] = $indent . $row; } } // concat all lines $html = implode("\n", $array); return "\n{$html}\n"; }
public function translate($string) { $exploder = new StringExploder(); $flagMap = array(); $descriptorMap = array(); foreach ($this->descriptors as $descriptor) { $openTag = $descriptor->getTag(); $closeTag = '/' . $openTag; $exploder->addDescriptor(new StringExploderDescriptor("#\\[{$openTag}(=[^\\]]+)?\\]#", $openTag)); if ($descriptor->hasCloseTag()) { $exploder->addDescriptor(new StringExploderDescriptor("#\\[{$closeTag}\\]#", $closeTag)); $flagMap[$openTag] = $closeTag; } $descriptorMap[$openTag] = $descriptor; } $array = $exploder->parse($string); while (count($array) > 1 || count($array) > 0 && is_array($array[0])) { $closeIndex = -1; foreach ($array as $index => $chunk) { if (is_array($chunk) && $chunk[1][0] === '/') { $closeIndex = $index; break; } } $openIndex = -1; foreach ($array as $index => $chunk) { if (is_array($chunk) && $chunk[1][0] !== '/') { if ($closeIndex !== -1 && $index >= $closeIndex) { break; } else { $openIndex = $index; } } } if ($openIndex === -1) { if ($closeIndex === -1) { break; // no more tag can be computed } else { $tag = $array[$closeIndex][0]; throw new Exception("Alone closing tag '{$tag}' at {$closeIndex}"); } } $openTag = $array[$openIndex][0]; $openFlag = $array[$openIndex][1]; $descriptor = $descriptorMap[$openFlag]; unset($array[$openIndex]); $extractParameter = function ($openTag) { $parts = preg_split('#=#', substr($openTag, 1, strlen($openTag) - 2), 2, PREG_SPLIT_NO_EMPTY); return count($parts) > 1 ? $parts[1] : null; }; if (!$descriptor->hasCloseTag()) { $descriptor = new BBCodeDescriptor($descriptor->getTag(), $descriptor->getOpenTagCallback()); $descriptor->setContent(null); $descriptor->setParameter($extractParameter($openTag)); $array[$openIndex] = $descriptor; } else { if ($closeIndex === -1) { throw new Exception("Alone opening tag '{$openTag}' at {$openIndex}"); } else { $closeTag = $array[$closeIndex][0]; $closeFlag = $array[$closeIndex][1]; unset($array[$closeIndex]); if ($flagMap[$openFlag] !== $closeFlag) { $extract = substr($string, $openIndex, $closeIndex - $openIndex + strlen($closeTag)); throw new Exception("Crossing tags for '{$extract}' at {$openIndex}"); } else { $content = array(); $contentIndex = $openIndex + strlen($openTag); while ($contentIndex < $closeIndex) { $row = $array[$contentIndex]; unset($array[$contentIndex]); $content[] = $row; $row = $row instanceof BBCodeDescriptor ? $row->toString() : $row; $contentIndex += strlen($row); } if (count($content) == 1) { $content = $content[0]; } else { if (count($content) == 0) { $content = null; } } $descriptor = new BBCodeDescriptor($descriptor->getTag(), $descriptor->getOpenTagCallback(), $descriptor->getCloseTagCallback(), $descriptor->getContentCallback()); $descriptor->setContent($content); $descriptor->setParameter($extractParameter($openTag)); $array[$openIndex] = $descriptor; } } } ksort($array); } $root = new BBCodeDescriptor(null, null); $root->setContent($array); $html = $root->generateHTML(); return $html; }