function encodeEntities($text) { /* Convert high and low ascii to entities. If multibyte string functions are available (on by default in php 4.3+), we convert using unicode mapping as defined in the function encode_high(). If not, we use php's nasty built-in htmlentities() */ return function_exists('mb_encode_numericentity') ? encode_high($text) : htmlentities($text, ENT_NOQUOTES, "utf-8"); }
function textile($text) { $text = stripslashes($text); $text = preg_replace("/&(?![#a-zA-Z0-9]+;)/", "x%x%", $text); if (function_exists('mb_encode_numericentity')) { $text = encode_high($text); } else { $text = htmlentities($text, ENT_NOQUOTES, "utf-8"); } $text = str_replace(array(">", "<", "&"), array(">", "<", "&"), $text); $text = str_replace("\r\n", "\n", $text); $text = str_replace("\t", "", $text); $text = preg_split("/\n/", $text); foreach ($text as $line) { $line = rtrim($line); $lineout[] = $line; } $text = implode("\n", $lineout); $text = preg_replace('/(^|\\s)==(.*)==(\\s|$)?/msU', '$1<notextile>$2</notextile>$3', $text); $text = preg_replace('/!([^\\s\\(=]+)\\s?(?:\\(([^\\)]+)\\))?!(\\s)?/mU', '<img src="$1" alt="$2" border="0" />$3', $text); $text = preg_replace('/(<img.+ \\/>):(\\S+)(\\s)/U', '<a href="$2">$1</a>$3', $text); $text = preg_replace('/ ([\\s[{(]|[[:punct:]])? # 1 optional space or brackets before " # starting " ([^"\\(]+) # 2 text of link \\s? # opt space (?:\\(([^\\(]*)\\))? # 3 opt title attribute in parenths ": # dividing ": (\\S+\\b) # 4 suppose this is the url (\\/)? # 5 opt trailing slash ([^[:alnum:]\\/;]*) # 6 opt punctuation after the url (\\s|$) # 7 either white space or end of string /x', '$1<a href="$4$5" title="$3">$2</a>$6$7', $text); $qtags = array('\\*\\*' => 'b', '\\*' => 'strong', '\\?\\?' => 'cite', '-' => 'del', '\\+' => 'ins', '~' => 'sub', '@' => 'code'); foreach ($qtags as $f => $r) { $text = preg_replace('/(^|\\s|>)' . $f . '\\b(.+)\\b([[:punct:]]*)' . $f . '([[:punct:]]{0,2})(\\s|$)?/mU', '$1<' . $r . '>$2$3</' . $r . '>$4$5', $text); } $text = preg_replace('/(^|\\s)__(.*)__([[:punct:]]{0,2})(\\s|$)?/mU', '$1<i>$2</i>$3$4', $text); $text = preg_replace('/(^|\\s)_(.*)_([[:punct:]]{0,2})(\\s|$)?/mU', '$1<em>$2</em>$3$4', $text); $text = preg_replace('/\\^(.*)\\^/mU', '<sup>$1</sup>', $text); $text = preg_replace('/"$/', "\" ", $text); $glyph_search = array('/([^\\s[{(>])?\'(?(1)|(?=\\s|s\\b))/', '/\'/', '/([^\\s[{(])?"(?(1)|(?=\\s))/', '/"/', '/\\b( )?\\.{3}/', '/\\b([A-Z][A-Z0-9]{2,})\\b(?:[(]([^)]*)[)])/', '/(^|[^"][>\\s])([A-Z][A-Z0-9 ]{2,})([^<a-z0-9]|$)/', '/\\s?--\\s?/', '/\\s-\\s/', '/(\\d+) ?x ?(\\d+)/', '/\\b ?[([]TM[])]/i', '/\\b ?[([]R[])]/i', '/\\b ?[([]C[])]/i'); # copyright $glyph_replace = array('$1’$2', '‘', '$1”', '“', '$1…', '<acronym title="$2">$1</acronym>', '$1<span class="caps">$2</span>$3', '—', ' – ', '$1×$2', '™', '®', '©'); # copyright $codepre = false; if (!preg_match("/<.*>/", $text)) { $text = preg_replace($glyph_search, $glyph_replace, $text); } else { $text = preg_split("/(<.*>)/U", $text, -1, PREG_SPLIT_DELIM_CAPTURE); foreach ($text as $line) { # matches are off if we're between <code>, <pre> etc. if (preg_match('/<(code|pre|kbd|notextile)>/i', $line)) { $codepre = true; } if (preg_match('/<\\/(code|pre|kbd|notextile)>/i', $line)) { $codepre = false; } if (!preg_match("/<.*>/", $line) && $codepre == false) { $line = preg_replace($glyph_search, $glyph_replace, $line); } # convert htmlspecial if between <code> if ($codepre == true) { $line = htmlspecialchars($line, ENT_NOQUOTES, "UTF-8"); $line = str_replace("<pre>", "<pre>", $line); $line = str_replace("<code>", "<code>", $line); $line = str_replace("<notextile>", "<notextile>", $line); $line = str_replace("<kbd>", "<kbd>", $line); } # each line gets pushed to a new array $glyph_out[] = $line; } # $text is now the new array, cast to a string $text = implode('', $glyph_out); } $text = preg_replace("/(\\S)(_*)([[:punct:]]*) *\n([^#*\\s])/", "\$1\$2\$3<br />\$4", $text); $text = str_replace("l><br />", "l>\n", $text); $text = preg_split("/\n/", $text); array_push($text, " "); $list = ''; $pre = false; $block_find = array('/^\\s?\\*\\s(.*)/', '/^\\s?#\\s(.*)/', '/^bq\\. (.*)/', '/^h(\\d)\\(([[:alnum:]]+)\\)\\.\\s(.*)/', '/^h(\\d)\\. (.*)/', '/^p\\(([[:alnum:]]+)\\)\\.\\s(.*)/', '/^p\\. (.*)/i', '/^([^\\t ]+.*)/i'); $block_replace = array("\t\t<li>\$1</li>", "\t\t\t<li>\$1</li>", "\t<blockquote>\$1</blockquote>", "\t<h\$1 class=\"\$2\">\$3</h\$1>\$4", "\t<h\$1>\$2</h\$1>\$3", "\t<p class=\"\$1\">\$2</p>\$3", "\t<p>\$1</p>", "\t<p>\$1</p>\$2"); foreach ($text as $line) { if (preg_match('/<pre>/i', $line)) { $pre = true; } if ($pre == false) { $line = preg_replace($block_find, $block_replace, $line); } if ($pre == true) { $line = str_replace("<br />", "\n", $line); } if (preg_match('/<\\/pre>/i', $line)) { $pre = false; } if ($list == '' && preg_match('/^\\t\\t<li>/', $line)) { $list = "ul"; $line = preg_replace('/^(\\t\\t<li>.*)/', "\t<ul>\n\$1", $line); } else { if ($list == '' && preg_match('/^\\t\\t\\t<li>/', $line)) { $list = "ol"; $line = preg_replace('/^\\t(\\t\\t<li>.*)/', "\t<ol>\n\$1", $line); # at the end of a ul } else { if ($list == 'ul' && !preg_match('/^\\t\\t<li>/', $line)) { $list = ''; $line = preg_replace('/^(.*)$/', "\t</ul>\n\$1", $line); # at the end of a ol } else { if ($list == 'ol' && !preg_match('/^\\t\\t\\t<li>/', $line)) { $list = ''; $line = preg_replace('/^(.*)$/', "\t</ol>\n\$1", $line); } } } } $block_out[] = $line; } $text = implode("\n", $block_out); $text = preg_replace('/<\\/?notextile>/', "", $text); $text = str_replace("x%x%", "&", $text); $text = str_replace("<br />", "<br />\n", $text); return $text; }
function textile($text) { ### Basic global changes $text = stripslashes($text); # turn any incoming ampersands into a dummy character for now. # This uses a negative lookahead for alphanumerics followed by a semicolon, # implying an incoming html entity, to be skipped $text = preg_replace("/&(?![#a-zA-Z0-9]+;)/", "x%x%", $text); # entify everything if (function_exists('mb_encode_numericentity')) { $text = encode_high($text); } else { $text = htmlentities($text, ENT_NOQUOTES, "utf-8"); } # unentify angle brackets and ampersands $text = str_replace(array(">", "<", "&"), array(">", "<", "&"), $text); # zap carriage returns $text = str_replace("\r\n", "\n", $text); # zap tabs $text = str_replace("\t", "", $text); $text = preg_split("/\n/", $text); foreach ($text as $line) { $line = trim($line); $lineout[] = $line; } $text = implode("\n", $lineout); ### Find and replace quick tags # double equal signs means <notextile> $text = preg_replace('/(^|\\s)==(.*)==(\\s|$)?/msU', '$1<notextile>$2</notextile>$3', $text); # image qtag $text = preg_replace('/!([^\\s\\(=]+)\\s?(?:\\(([^\\)]+)\\))?!(\\s)?/mU', '<img src="$1" alt="$2" border="0" />$3', $text); # image with hyperlink $text = preg_replace('/(<img.+ \\/>):(\\S+)(\\s)/U', '<a href="$2">$1</a>$3', $text); # hyperlink qtag $text = preg_replace('/ ([\\s[{(]|[[:punct:]])? # 1 optional space or brackets before " # starting " ([^"\\(]+) # 2 text of link \\s? # opt space (?:\\(([^\\(]*)\\))? # 3 opt title attribute in parenths ": # dividing ": (\\S+\\b) # 4 suppose this is the url (\\/)? # 5 opt trailing slash ([^[:alnum:]\\/;]*) # 6 opt punctuation after the url (\\s|$) # 7 either white space or end of string /x', '$1<a href="$4$5" title="$3">$2</a>$6$7', $text); # arrange qtag delineators and replacements in an array $qtags = array('\\*\\*' => 'b', '\\*' => 'strong', '\\?\\?' => 'cite', '-' => 'del', '\\+' => 'ins', '~' => 'sub', '@' => 'code'); # loop through the array, replacing qtags with html foreach ($qtags as $f => $r) { $text = preg_replace('/(^|\\s|>)' . $f . '\\b(.+)\\b([[:punct:]]*)' . $f . '([[:punct:]]{0,2})(\\s|$)?/mU', '$1<' . $r . '>$2$3</' . $r . '>$4$5', $text); } # some weird bs with underscores and \b word boundaries, # so we'll do those on their own $text = preg_replace('/(^|\\s)__(.*)__([[:punct:]]{0,2})(\\s|$)?/mU', '$1<i>$2</i>$3$4', $text); $text = preg_replace('/(^|\\s)_(.*)_([[:punct:]]{0,2})(\\s|$)?/mU', '$1<em>$2</em>$3$4', $text); $text = preg_replace('/\\^(.*)\\^/mU', '<sup>$1</sup>', $text); ### Find and replace typographic chars and special tags # small problem with double quotes at the end of a string $text = preg_replace('/"$/', "\" ", $text); # NB: all these will wreak havoc inside <html> tags $glyph_search = array('/([^\\s[{(>])?\'(?(1)|(?=\\s|s\\b))/', '/\'/', '/([^\\s[{(])?"(?(1)|(?=\\s))/', '/"/', '/\\b( )?\\.{3}/', '/\\b([A-Z][A-Z0-9]{2,})\\b(?:[(]([^)]*)[)])/', '/(^|[^"][>\\s])([A-Z][A-Z0-9 ]{2,})([^<a-z0-9]|$)/', '/\\s?--\\s?/', '/\\s-\\s/', '/(\\d+) ?x ?(\\d+)/', '/\\b ?[([]TM[])]/i', '/\\b ?[([]R[])]/i', '/\\b ?[([]C[])]/i'); # copyright $glyph_replace = array('$1’$2', '‘', '$1”', '“', '$1…', '<acronym title="$2">$1</acronym>', '$1<span class="caps">$2</span>$3', '—', ' – ', '$1×$2', '™', '®', '©'); # copyright # set toggle for turning off replacements between <code> or <pre> $codepre = false; # if there is no html, do a simple search and replace if (!preg_match("/<.*>/", $text)) { $text = preg_replace($glyph_search, $glyph_replace, $text); } else { # else split the text into an array at <.*> $text = preg_split("/(<.*>)/U", $text, -1, PREG_SPLIT_DELIM_CAPTURE); foreach ($text as $line) { # matches are off if we're between <code>, <pre> etc. if (preg_match('/<(code|pre|kbd|notextile)>/i', $line)) { $codepre = true; } if (preg_match('/<\\/(code|pre|kbd|notextile)>/i', $line)) { $codepre = false; } if (!preg_match("/<.*>/", $line) && $codepre == false) { $line = preg_replace($glyph_search, $glyph_replace, $line); } # convert htmlspecial if between <code> if ($codepre == true) { $line = htmlspecialchars($line, ENT_NOQUOTES, "UTF-8"); $line = str_replace("<pre>", "<pre>", $line); $line = str_replace("<code>", "<code>", $line); $line = str_replace("<notextile>", "<notextile>", $line); $line = str_replace("<kbd>", "<kbd>", $line); } # each line gets pushed to a new array $glyph_out[] = $line; } # $text is now the new array, cast to a string $text = implode('', $glyph_out); } ### Block level formatting # deal with forced breaks; this is going to be a problem between # <pre> tags, but we'll clean them later $text = preg_replace("/(\\S)(_*)([[:punct:]]*) *\n([^#*\\s])/", "\$1\$2\$3<br />\$4", $text); # might be a problem with lists $text = str_replace("l><br />", "l>\n", $text); # clear out multiple newlines for now # $text = preg_replace("/\n+/","\n",$text); # split the text into an array by newlines $text = preg_split("/\n/", $text); array_push($text, " "); $list = ''; $pre = false; $block_find = array('/^\\s?\\*\\s(.*)/', '/^\\s?#\\s(.*)/', '/^bq\\. (.*)/', '/^h(\\d)\\(([[:alnum:]]+)\\)\\.\\s(.*)/', '/^h(\\d)\\. (.*)/', '/^p\\(([[:alnum:]]+)\\)\\.\\s(.*)/', '/^p\\. (.*)/i', '/^([^\\t ]+.*)/i'); $block_replace = array("\t\t<li>\$1</li>", "\t\t\t<li>\$1</li>", "\t<blockquote>\$1</blockquote>", "\t<h\$1 class=\"\$2\">\$3</h\$1>\$4", "\t<h\$1>\$2</h\$1>\$3", "\t<p class=\"\$1\">\$2</p>\$3", "\t<p>\$1</p>", "\t<p>\$1</p>\$2"); # loop through lines foreach ($text as $line) { # matches are off if we're between <pre> or <code> tags if (preg_match('/<pre>/i', $line)) { $pre = true; } # deal with block replacements first, then see if we're in a list if ($pre == false) { $line = preg_replace($block_find, $block_replace, $line); } # kill any br tags that slipped in earlier if ($pre == true) { $line = str_replace("<br />", "\n", $line); } # matches back on after </pre> if (preg_match('/<\\/pre>/i', $line)) { $pre = false; } # on entry to a list, $list switches to a value # two tabs means unordered list if ($list == '' && preg_match('/^\\t\\t<li>/', $line)) { $list = "ul"; $line = preg_replace('/^(\\t\\t<li>.*)/', "\t<ul>\n\$1", $line); } else { if ($list == '' && preg_match('/^\\t\\t\\t<li>/', $line)) { $list = "ol"; $line = preg_replace('/^\\t(\\t\\t<li>.*)/', "\t<ol>\n\$1", $line); # at the end of a ul } else { if ($list == 'ul' && !preg_match('/^\\t\\t<li>/', $line)) { $list = ''; $line = preg_replace('/^(.*)$/', "\t</ul>\n\$1", $line); # at the end of a ol } else { if ($list == 'ol' && !preg_match('/^\\t\\t\\t<li>/', $line)) { $list = ''; $line = preg_replace('/^(.*)$/', "\t</ol>\n\$1", $line); } } } } # push each line to a new array once processed $block_out[] = $line; } $text = implode("\n", $block_out); #clean up <notextile> $text = preg_replace('/<\\/?notextile>/', "", $text); # turn the temp char back to an ampersand entity $text = str_replace("x%x%", "&", $text); # Newline linebreaks, just for markup tidiness $text = str_replace("<br />", "<br />\n", $text); return $text; }