/**
  * Tests the static entities_to_utf8 method
  * @return void
  */
 public function test_entities_to_utf8()
 {
     $str = "Žluťoučký koníček©"&<>§«";
     $this->assertSame("Žluťoučký koníček©\"&<>§«", textlib::entities_to_utf8($str));
 }
示例#2
0
 function _unhtmlentities($string)
 {
     return textlib::entities_to_utf8($string);
     // Moodle hack
     $string = preg_replace('~&#x([0-9a-f]+);~ei', 'chr(hexdec("\\1"))', $string);
     $string = preg_replace('~&#([0-9]+);~e', 'chr(\\1)', $string);
     $trans_tbl = get_html_translation_table(HTML_ENTITIES);
     $trans_tbl = array_flip($trans_tbl);
     return strtr($string, $trans_tbl);
 }
示例#3
0
 /**
  * A lot of imported files contain unwanted entities.
  * This method tries to clean up all known problems.
  * @param string str string to correct
  * @return string the corrected string
  */
 public function cleaninput($str)
 {
     $html_code_list = array("&#039;" => "'", "&#8217;" => "'", "&#8220;" => "\"", "&#8221;" => "\"", "&#8211;" => "-", "&#8212;" => "-");
     $str = strtr($str, $html_code_list);
     // Use textlib entities_to_utf8 function to convert only numerical entities.
     $str = textlib::entities_to_utf8($str, false);
     return $str;
 }
示例#4
0
 public function test_entities_to_utf8()
 {
     $str = "&#x17d;lu&#x165;ou&#x10d;k&#xfd; kon&#237;&#269;ek";
     $this->assertIdentical(textlib::entities_to_utf8($str), "Žluťoučký koníček");
 }
示例#5
0
    protected function readquestion($lines) {
        global $OUTPUT;

        $text = implode(' ', $lines);
        $text = str_replace(array('\t','\n','\r'), array('','',''), $text);

        $startpos = strpos($text, '<question type');
        $endpos = strpos($text, '</question>');
        if ($startpos === false || $endpos === false) {
            return false;
        }

        preg_match("/<question type=[\"\']([^\"\']+)[\"\']>/i", $text, $matches);
        $type = strtolower($matches[1]); // multichoice or multianswerchoice

        $questiontext = textlib::entities_to_utf8($this->stringbetween($text, '<text>', '</text>'));
        $questionhint = textlib::entities_to_utf8($this->stringbetween($text, '<hint>', '</hint>'));
        $questionaward = $this->stringbetween($text, '<award>', '</award>');
        $optionlist = $this->stringbetween($text, '<answer>', '</answer>');

        $optionlist = explode('<option', $optionlist);

        $n = 0;

        $optionscorrect = array();
        $optionstext = array();

        if ($type == 'multichoice') {
            foreach ($optionlist as $option) {
                if (trim($option) === '') {
                    continue;
                }
                $correct = $this->stringbetween($option, ' correct="', '">');
                $answer = $this->stringbetween($option, '">', '</option>');
                $optionscorrect[$n] = $correct;
                $optionstext[$n] = textlib::entities_to_utf8($answer);
                ++$n;
            }
        } else if ($type == 'multianswerchoice') {
            $numcorrect = 0;
            $totalaward = 0;

            $optionsaward = array();

            foreach ($optionlist as $option) {
                if (trim($option) === '') {
                    continue;
                }
                preg_match("/correct=\"([^\"]*)\"/i", $option, $correctmatch);
                preg_match("/award=\"([^\"]*)\"/i", $option, $awardmatch);

                $correct = $correctmatch[1];
                $award = $awardmatch[1];
                if ($correct == 'yes') {
                    $totalaward += $award;
                    ++$numcorrect;
                }

                $answer = $this->stringbetween($option, '">', '</option>');

                $optionscorrect[$n] = $correct;
                $optionstext[$n] = textlib::entities_to_utf8($answer);
                $optionsaward[$n] = $award;
                ++$n;
            }

        } else {
            echo $OUTPUT->notification(get_string('unknownorunhandledtype', 'question', $type));
        }

        $question = $this->defaultquestion();
        $question->qtype = 'multichoice';
        $question->name = $this->create_default_question_name($questiontext, get_string('questionname', 'question'));
        $this->add_blank_combined_feedback($question);

        $question->questiontext = $questiontext;
        $question->questiontextformat = FORMAT_HTML;
        $question->single = ($type == 'multichoice') ? 1 : 0;

        $question->fraction = array();
        $question->answer = array();
        for ($n = 0; $n < count($optionstext); ++$n) {
            if ($optionstext[$n]) {
                if (!isset($numcorrect)) {
                    // Single answer.
                    if ($optionscorrect[$n] == 'yes') {
                        $fraction = (int) $questionaward;
                    } else {
                        $fraction = 0;
                    }
                } else {
                    // Multiple answers.
                    if ($optionscorrect[$n] == 'yes') {
                        $fraction = $optionsaward[$n] / $totalaward;
                    } else {
                        $fraction = -$optionsaward[$n] / count($optionstext);
                    }
                }
                $question->fraction[] = $fraction;
                $question->answer[] = array('text' => $optionstext[$n], 'format' => FORMAT_HTML);
                $question->feedback[] = array('text' => '', 'format' => FORMAT_HTML); // No feedback in this type.
            }
        }

        return $question;
    }
示例#6
0
/**
 * Given text in a variety of format codings, this function returns
 * the text as plain text suitable for plain email.
 *
 * @uses FORMAT_MOODLE
 * @uses FORMAT_HTML
 * @uses FORMAT_PLAIN
 * @uses FORMAT_WIKI
 * @uses FORMAT_MARKDOWN
 * @param string $text The text to be formatted. This is raw text originally from user input.
 * @param int $format Identifier of the text format to be used
 *            [FORMAT_MOODLE, FORMAT_HTML, FORMAT_PLAIN, FORMAT_WIKI, FORMAT_MARKDOWN]
 * @return string
 */
function format_text_email($text, $format)
{
    switch ($format) {
        case FORMAT_PLAIN:
            return $text;
            break;
        case FORMAT_WIKI:
            // there should not be any of these any more!
            $text = wikify_links($text);
            return textlib::entities_to_utf8(strip_tags($text), true);
            break;
        case FORMAT_HTML:
            return html_to_text($text);
            break;
        case FORMAT_MOODLE:
        case FORMAT_MARKDOWN:
        default:
            $text = wikify_links($text);
            return textlib::entities_to_utf8(strip_tags($text), true);
            break;
    }
}
 function test_obfuscate_text()
 {
     $text = 'Žluťoučký koníček 32131';
     $obfuscated = obfuscate_text($text);
     $this->assertNotSame($text, $obfuscated);
     $back = textlib::entities_to_utf8($obfuscated, true);
     $this->assertSame($text, $back);
 }
/**
 * Simplifies a string according to indexing rules.
 */
function tokenise_simplify($text, $overlap_cjk, $join_numbers)
{
    // Decode entities to UTF-8
    $text = textlib::entities_to_utf8($text, true);
    // Lowercase
    $text = textlib::strtolower($text);
    // Simple CJK handling
    if ($overlap_cjk) {
        $text = preg_replace_callback('/[' . PREG_CLASS_CJK . ']+/u', 'tokenise_expand_cjk', $text);
    }
    // To improve searching for numerical data such as dates, IP addresses
    // or version numbers, we consider a group of numerical characters
    // separated only by punctuation characters to be one piece.
    // This also means that searching for e.g. '20/03/1984' also returns
    // results with '20-03-1984' in them.
    // Readable regexp: ([number]+)[punctuation]+(?=[number])
    if ($join_numbers) {
        $text = preg_replace('/([' . PREG_CLASS_NUMBERS . ']+)[' . PREG_CLASS_PUNCTUATION . ']+(?=[' . PREG_CLASS_NUMBERS . '])/u', '\\1', $text);
    } else {
        // Keep all the detected numbers+punctuation in a safe place in order to restore them later
        preg_match_all('/[' . PREG_CLASS_NUMBERS . ']+[' . PREG_CLASS_PUNCTUATION . PREG_CLASS_NUMBERS . ']+/u', $text, $foundseqs);
        $storedseqs = array();
        foreach (array_unique($foundseqs[0]) as $ntkey => $value) {
            $prefix = (string) (count($storedseqs) + 1);
            $storedseqs[START_DELIM . $prefix . CENTER_DELIM . $ntkey . END_DELIM] = $value;
        }
        if (!empty($storedseqs)) {
            $text = str_replace($storedseqs, array_keys($storedseqs), $text);
        }
    }
    // The dot, underscore and dash are simply removed. This allows meaningful
    // search behaviour with acronyms and URLs.
    $text = preg_replace('/[._-]+/', '', $text);
    // With the exception of the rules above, we consider all punctuation,
    // marks, spacers, etc, to be a word boundary.
    $text = preg_replace('/[' . PREG_CLASS_SEARCH_EXCLUDE . ']+/u', ' ', $text);
    // Restore, if not joining numbers, recover the original strings
    if (!$join_numbers) {
        if (!empty($storedseqs)) {
            $text = str_replace(array_keys($storedseqs), $storedseqs, $text);
        }
    }
    return $text;
}