/** * Process phrases intelligently found within a HTML text (such as adding links). * * @staticvar array $usedpharses * @param string $text the text that we are filtering * @param array $link_array an array of filterobjects * @param array $ignoretagsopen an array of opening tags that we should ignore while filtering * @param array $ignoretagsclose an array of corresponding closing tags * @param bool $overridedefaultignore True to only use tags provided by arguments * @return string **/ function filter_phrases($text, &$link_array, $ignoretagsopen = NULL, $ignoretagsclose = NULL, $overridedefaultignore = false) { global $CFG; static $usedphrases; $ignoretags = array(); // To store all the enclosig tags to be completely ignored. $tags = array(); // To store all the simple tags to be ignored. if (!$overridedefaultignore) { // A list of open/close tags that we should not replace within // Extended to include <script>, <textarea>, <select> and <a> tags // Regular expression allows tags with or without attributes $filterignoretagsopen = array('<head>', '<nolink>', '<span class="nolink">', '<script(\\s[^>]*?)?>', '<textarea(\\s[^>]*?)?>', '<select(\\s[^>]*?)?>', '<a(\\s[^>]*?)?>'); $filterignoretagsclose = array('</head>', '</nolink>', '</span>', '</script>', '</textarea>', '</select>', '</a>'); } else { // Set an empty default list. $filterignoretagsopen = array(); $filterignoretagsclose = array(); } // Add the user defined ignore tags to the default list. if (is_array($ignoretagsopen)) { foreach ($ignoretagsopen as $open) { $filterignoretagsopen[] = $open; } foreach ($ignoretagsclose as $close) { $filterignoretagsclose[] = $close; } } // Invalid prefixes and suffixes for the fullmatch searches // Every "word" character, but the underscore, is a invalid suffix or prefix. // (nice to use this because it includes national characters (accents...) as word characters. $filterinvalidprefixes = '([^\\W_])'; $filterinvalidsuffixes = '([^\\W_])'; // Double up some magic chars to avoid "accidental matches" $text = preg_replace('/([#*%])/', '\\1\\1', $text); //Remove everything enclosed by the ignore tags from $text filter_save_ignore_tags($text, $filterignoretagsopen, $filterignoretagsclose, $ignoretags); // Remove tags from $text filter_save_tags($text, $tags); // Time to cycle through each phrase to be linked $size = sizeof($link_array); for ($n = 0; $n < $size; $n++) { $linkobject =& $link_array[$n]; // Set some defaults if certain properties are missing // Properties may be missing if the filterobject class has not been used to construct the object if (empty($linkobject->phrase)) { continue; } // Avoid integers < 1000 to be linked. See bug 1446. $intcurrent = intval($linkobject->phrase); if (!empty($intcurrent) && strval($intcurrent) == $linkobject->phrase && $intcurrent < 1000) { continue; } // All this work has to be done ONLY it it hasn't been done before if (!$linkobject->work_calculated) { if (!isset($linkobject->hreftagbegin) or !isset($linkobject->hreftagend)) { $linkobject->work_hreftagbegin = '<span class="highlight"'; $linkobject->work_hreftagend = '</span>'; } else { $linkobject->work_hreftagbegin = $linkobject->hreftagbegin; $linkobject->work_hreftagend = $linkobject->hreftagend; } // Double up chars to protect true duplicates // be cleared up before returning to the user. $linkobject->work_hreftagbegin = preg_replace('/([#*%])/', '\\1\\1', $linkobject->work_hreftagbegin); if (empty($linkobject->casesensitive)) { $linkobject->work_casesensitive = false; } else { $linkobject->work_casesensitive = true; } if (empty($linkobject->fullmatch)) { $linkobject->work_fullmatch = false; } else { $linkobject->work_fullmatch = true; } // Strip tags out of the phrase $linkobject->work_phrase = strip_tags($linkobject->phrase); // Double up chars that might cause a false match -- the duplicates will // be cleared up before returning to the user. $linkobject->work_phrase = preg_replace('/([#*%])/', '\\1\\1', $linkobject->work_phrase); // Set the replacement phrase properly if ($linkobject->replacementphrase) { //We have specified a replacement phrase // Strip tags $linkobject->work_replacementphrase = strip_tags($linkobject->replacementphrase); } else { //The replacement is the original phrase as matched below $linkobject->work_replacementphrase = '$1'; } // Quote any regular expression characters and the delimiter in the work phrase to be searched $linkobject->work_phrase = preg_quote($linkobject->work_phrase, '/'); // Work calculated $linkobject->work_calculated = true; } // If $CFG->filtermatchoneperpage, avoid previously (request) linked phrases if (!empty($CFG->filtermatchoneperpage)) { if (!empty($usedphrases) && in_array($linkobject->work_phrase, $usedphrases)) { continue; } } // Regular expression modifiers $modifiers = $linkobject->work_casesensitive ? 's' : 'isu'; // works in unicode mode! // Do we need to do a fullmatch? // If yes then go through and remove any non full matching entries if ($linkobject->work_fullmatch) { $notfullmatches = array(); $regexp = '/' . $filterinvalidprefixes . '(' . $linkobject->work_phrase . ')|(' . $linkobject->work_phrase . ')' . $filterinvalidsuffixes . '/' . $modifiers; preg_match_all($regexp, $text, $list_of_notfullmatches); if ($list_of_notfullmatches) { foreach (array_unique($list_of_notfullmatches[0]) as $key => $value) { $notfullmatches['<*' . $key . '*>'] = $value; } if (!empty($notfullmatches)) { $text = str_replace($notfullmatches, array_keys($notfullmatches), $text); } } } // Finally we do our highlighting if (!empty($CFG->filtermatchonepertext) || !empty($CFG->filtermatchoneperpage)) { $resulttext = preg_replace('/(' . $linkobject->work_phrase . ')/' . $modifiers, $linkobject->work_hreftagbegin . $linkobject->work_replacementphrase . $linkobject->work_hreftagend, $text, 1); } else { $resulttext = preg_replace('/(' . $linkobject->work_phrase . ')/' . $modifiers, $linkobject->work_hreftagbegin . $linkobject->work_replacementphrase . $linkobject->work_hreftagend, $text); } // If the text has changed we have to look for links again if ($resulttext != $text) { // Set $text to $resulttext $text = $resulttext; // Remove everything enclosed by the ignore tags from $text filter_save_ignore_tags($text, $filterignoretagsopen, $filterignoretagsclose, $ignoretags); // Remove tags from $text filter_save_tags($text, $tags); // If $CFG->filtermatchoneperpage, save linked phrases to request if (!empty($CFG->filtermatchoneperpage)) { $usedphrases[] = $linkobject->work_phrase; } } // Replace the not full matches before cycling to next link object if (!empty($notfullmatches)) { $text = str_replace(array_keys($notfullmatches), $notfullmatches, $text); unset($notfullmatches); } } // Rebuild the text with all the excluded areas if (!empty($tags)) { $text = str_replace(array_keys($tags), $tags, $text); } if (!empty($ignoretags)) { $ignoretags = array_reverse($ignoretags); // Reversed so "progressive" str_replace() will solve some nesting problems. $text = str_replace(array_keys($ignoretags), $ignoretags, $text); } // Remove the protective doubleups $text = preg_replace('/([#*%])(\\1)/', '\\1', $text); // Add missing javascript for popus $text = filter_add_javascript($text); return $text; }
/** * Given some normal text this function will break up any * long words to a given size by inserting the given character * * It's multibyte savvy and doesn't change anything inside html tags. * * @param string $string the string to be modified * @param int $maxsize maximum length of the string to be returned * @param string $cutchar the string used to represent word breaks * @return string */ function break_up_long_words($string, $maxsize = 20, $cutchar = ' ') { /// Loading the textlib singleton instance. We are going to need it. $textlib = textlib_get_instance(); /// First of all, save all the tags inside the text to skip them $tags = array(); filter_save_tags($string, $tags); /// Process the string adding the cut when necessary $output = ''; $length = $textlib->strlen($string); $wordlength = 0; for ($i = 0; $i < $length; $i++) { $char = $textlib->substr($string, $i, 1); if ($char == ' ' or $char == "\t" or $char == "\n" or $char == "\r" or $char == "<" or $char == ">") { $wordlength = 0; } else { $wordlength++; if ($wordlength > $maxsize) { $output .= $cutchar; $wordlength = 0; } } $output .= $char; } /// Finally load the tags back again if (!empty($tags)) { $output = str_replace(array_keys($tags), $tags, $output); } return $output; }