/**
 * Process phrases intelligently found within a HTML text (such as adding links).
 *
 * @staticvar array $usedpharses
 * @param string $text             the text that we are filtering
 * @param array $link_array       an array of filterobjects
 * @param array $ignoretagsopen   an array of opening tags that we should ignore while filtering
 * @param array $ignoretagsclose  an array of corresponding closing tags
 * @param bool $overridedefaultignore True to only use tags provided by arguments
 * @return string
 **/
function filter_phrases($text, &$link_array, $ignoretagsopen = NULL, $ignoretagsclose = NULL, $overridedefaultignore = false)
{
    global $CFG;
    static $usedphrases;
    $ignoretags = array();
    // To store all the enclosig tags to be completely ignored.
    $tags = array();
    // To store all the simple tags to be ignored.
    if (!$overridedefaultignore) {
        // A list of open/close tags that we should not replace within
        // Extended to include <script>, <textarea>, <select> and <a> tags
        // Regular expression allows tags with or without attributes
        $filterignoretagsopen = array('<head>', '<nolink>', '<span class="nolink">', '<script(\\s[^>]*?)?>', '<textarea(\\s[^>]*?)?>', '<select(\\s[^>]*?)?>', '<a(\\s[^>]*?)?>');
        $filterignoretagsclose = array('</head>', '</nolink>', '</span>', '</script>', '</textarea>', '</select>', '</a>');
    } else {
        // Set an empty default list.
        $filterignoretagsopen = array();
        $filterignoretagsclose = array();
    }
    // Add the user defined ignore tags to the default list.
    if (is_array($ignoretagsopen)) {
        foreach ($ignoretagsopen as $open) {
            $filterignoretagsopen[] = $open;
        }
        foreach ($ignoretagsclose as $close) {
            $filterignoretagsclose[] = $close;
        }
    }
    // Invalid prefixes and suffixes for the fullmatch searches
    // Every "word" character, but the underscore, is a invalid suffix or prefix.
    // (nice to use this because it includes national characters (accents...) as word characters.
    $filterinvalidprefixes = '([^\\W_])';
    $filterinvalidsuffixes = '([^\\W_])';
    // Double up some magic chars to avoid "accidental matches"
    $text = preg_replace('/([#*%])/', '\\1\\1', $text);
    //Remove everything enclosed by the ignore tags from $text
    filter_save_ignore_tags($text, $filterignoretagsopen, $filterignoretagsclose, $ignoretags);
    // Remove tags from $text
    filter_save_tags($text, $tags);
    // Time to cycle through each phrase to be linked
    $size = sizeof($link_array);
    for ($n = 0; $n < $size; $n++) {
        $linkobject =& $link_array[$n];
        // Set some defaults if certain properties are missing
        // Properties may be missing if the filterobject class has not been used to construct the object
        if (empty($linkobject->phrase)) {
            continue;
        }
        // Avoid integers < 1000 to be linked. See bug 1446.
        $intcurrent = intval($linkobject->phrase);
        if (!empty($intcurrent) && strval($intcurrent) == $linkobject->phrase && $intcurrent < 1000) {
            continue;
        }
        // All this work has to be done ONLY it it hasn't been done before
        if (!$linkobject->work_calculated) {
            if (!isset($linkobject->hreftagbegin) or !isset($linkobject->hreftagend)) {
                $linkobject->work_hreftagbegin = '<span class="highlight"';
                $linkobject->work_hreftagend = '</span>';
            } else {
                $linkobject->work_hreftagbegin = $linkobject->hreftagbegin;
                $linkobject->work_hreftagend = $linkobject->hreftagend;
            }
            // Double up chars to protect true duplicates
            // be cleared up before returning to the user.
            $linkobject->work_hreftagbegin = preg_replace('/([#*%])/', '\\1\\1', $linkobject->work_hreftagbegin);
            if (empty($linkobject->casesensitive)) {
                $linkobject->work_casesensitive = false;
            } else {
                $linkobject->work_casesensitive = true;
            }
            if (empty($linkobject->fullmatch)) {
                $linkobject->work_fullmatch = false;
            } else {
                $linkobject->work_fullmatch = true;
            }
            // Strip tags out of the phrase
            $linkobject->work_phrase = strip_tags($linkobject->phrase);
            // Double up chars that might cause a false match -- the duplicates will
            // be cleared up before returning to the user.
            $linkobject->work_phrase = preg_replace('/([#*%])/', '\\1\\1', $linkobject->work_phrase);
            // Set the replacement phrase properly
            if ($linkobject->replacementphrase) {
                //We have specified a replacement phrase
                // Strip tags
                $linkobject->work_replacementphrase = strip_tags($linkobject->replacementphrase);
            } else {
                //The replacement is the original phrase as matched below
                $linkobject->work_replacementphrase = '$1';
            }
            // Quote any regular expression characters and the delimiter in the work phrase to be searched
            $linkobject->work_phrase = preg_quote($linkobject->work_phrase, '/');
            // Work calculated
            $linkobject->work_calculated = true;
        }
        // If $CFG->filtermatchoneperpage, avoid previously (request) linked phrases
        if (!empty($CFG->filtermatchoneperpage)) {
            if (!empty($usedphrases) && in_array($linkobject->work_phrase, $usedphrases)) {
                continue;
            }
        }
        // Regular expression modifiers
        $modifiers = $linkobject->work_casesensitive ? 's' : 'isu';
        // works in unicode mode!
        // Do we need to do a fullmatch?
        // If yes then go through and remove any non full matching entries
        if ($linkobject->work_fullmatch) {
            $notfullmatches = array();
            $regexp = '/' . $filterinvalidprefixes . '(' . $linkobject->work_phrase . ')|(' . $linkobject->work_phrase . ')' . $filterinvalidsuffixes . '/' . $modifiers;
            preg_match_all($regexp, $text, $list_of_notfullmatches);
            if ($list_of_notfullmatches) {
                foreach (array_unique($list_of_notfullmatches[0]) as $key => $value) {
                    $notfullmatches['<*' . $key . '*>'] = $value;
                }
                if (!empty($notfullmatches)) {
                    $text = str_replace($notfullmatches, array_keys($notfullmatches), $text);
                }
            }
        }
        // Finally we do our highlighting
        if (!empty($CFG->filtermatchonepertext) || !empty($CFG->filtermatchoneperpage)) {
            $resulttext = preg_replace('/(' . $linkobject->work_phrase . ')/' . $modifiers, $linkobject->work_hreftagbegin . $linkobject->work_replacementphrase . $linkobject->work_hreftagend, $text, 1);
        } else {
            $resulttext = preg_replace('/(' . $linkobject->work_phrase . ')/' . $modifiers, $linkobject->work_hreftagbegin . $linkobject->work_replacementphrase . $linkobject->work_hreftagend, $text);
        }
        // If the text has changed we have to look for links again
        if ($resulttext != $text) {
            // Set $text to $resulttext
            $text = $resulttext;
            // Remove everything enclosed by the ignore tags from $text
            filter_save_ignore_tags($text, $filterignoretagsopen, $filterignoretagsclose, $ignoretags);
            // Remove tags from $text
            filter_save_tags($text, $tags);
            // If $CFG->filtermatchoneperpage, save linked phrases to request
            if (!empty($CFG->filtermatchoneperpage)) {
                $usedphrases[] = $linkobject->work_phrase;
            }
        }
        // Replace the not full matches before cycling to next link object
        if (!empty($notfullmatches)) {
            $text = str_replace(array_keys($notfullmatches), $notfullmatches, $text);
            unset($notfullmatches);
        }
    }
    // Rebuild the text with all the excluded areas
    if (!empty($tags)) {
        $text = str_replace(array_keys($tags), $tags, $text);
    }
    if (!empty($ignoretags)) {
        $ignoretags = array_reverse($ignoretags);
        // Reversed so "progressive" str_replace() will solve some nesting problems.
        $text = str_replace(array_keys($ignoretags), $ignoretags, $text);
    }
    // Remove the protective doubleups
    $text = preg_replace('/([#*%])(\\1)/', '\\1', $text);
    // Add missing javascript for popus
    $text = filter_add_javascript($text);
    return $text;
}
示例#2
0
文件: weblib.php 项目: hatone/moodle
/**
 * Given some normal text this function will break up any
 * long words to a given size by inserting the given character
 *
 * It's multibyte savvy and doesn't change anything inside html tags.
 *
 * @param string $string the string to be modified
 * @param int $maxsize maximum length of the string to be returned
 * @param string $cutchar the string used to represent word breaks
 * @return string
 */
function break_up_long_words($string, $maxsize = 20, $cutchar = ' ')
{
    /// Loading the textlib singleton instance. We are going to need it.
    $textlib = textlib_get_instance();
    /// First of all, save all the tags inside the text to skip them
    $tags = array();
    filter_save_tags($string, $tags);
    /// Process the string adding the cut when necessary
    $output = '';
    $length = $textlib->strlen($string);
    $wordlength = 0;
    for ($i = 0; $i < $length; $i++) {
        $char = $textlib->substr($string, $i, 1);
        if ($char == ' ' or $char == "\t" or $char == "\n" or $char == "\r" or $char == "<" or $char == ">") {
            $wordlength = 0;
        } else {
            $wordlength++;
            if ($wordlength > $maxsize) {
                $output .= $cutchar;
                $wordlength = 0;
            }
        }
        $output .= $char;
    }
    /// Finally load the tags back again
    if (!empty($tags)) {
        $output = str_replace(array_keys($tags), $tags, $output);
    }
    return $output;
}