/** * Given text in a variety of format codings, this function returns the text as plain text suitable for plain email. * * @param string $text The text to be formatted. This is raw text originally from user input. * @param int $format Identifier of the text format to be used * [FORMAT_MOODLE, FORMAT_HTML, FORMAT_PLAIN, FORMAT_WIKI, FORMAT_MARKDOWN] * @return string */ function format_text_email($text, $format) { switch ($format) { case FORMAT_PLAIN: return $text; break; case FORMAT_WIKI: // There should not be any of these any more! $text = wikify_links($text); return core_text::entities_to_utf8(strip_tags($text), true); break; case FORMAT_HTML: return html_to_text($text); break; case FORMAT_MOODLE: case FORMAT_MARKDOWN: default: $text = wikify_links($text); return core_text::entities_to_utf8(strip_tags($text), true); break; } }
public function test_obfuscate_text() { $text = 'Žluťoučký koníček 32131'; $obfuscated = obfuscate_text($text); $this->assertNotSame($text, $obfuscated); $back = core_text::entities_to_utf8($obfuscated, true); $this->assertSame($text, $back); }
/** * Get normalised certificate file name without file extension. * * @param stdClass $certificate * @param stdClass $cm * @param stdClass $course * @return string file name without extension */ function certificate_get_certificate_filename($certificate, $cm, $course) { $coursecontext = context_course::instance($course->id); $coursename = format_string($course->shortname, true, array('context' => $coursecontext)); $context = context_module::instance($cm->id); $name = format_string($certificate->name, true, array('context' => $context)); $filename = $coursename . '_' . $name; $filename = core_text::entities_to_utf8($filename); $filename = strip_tags($filename); $filename = rtrim($filename, '.'); // Ampersand is not a valid filename char, let's replace it with something else. $filename = str_replace('&', '_', $filename); $filename = clean_filename($filename); if (empty($filename)) { // This is weird, but we need some file name. $filename = 'certificate'; } return $filename; }
/** * A lot of imported files contain unwanted entities. * This method tries to clean up all known problems. * @param string str string to correct * @return string the corrected string */ public function cleaninput($str) { $html_code_list = array("'" => "'", "’" => "'", "“" => "\"", "”" => "\"", "–" => "-", "—" => "-"); $str = strtr($str, $html_code_list); // Use core_text entities_to_utf8 function to convert only numerical entities. $str = core_text::entities_to_utf8($str, false); return $str; }
/** * Tests the static entities_to_utf8 method. */ public function test_entities_to_utf8() { $str = "Žluťoučký koníček©"&<>§«"; $this->assertSame("Žluťoučký koníček©\"&<>§«", core_text::entities_to_utf8($str)); }
protected function readquestion($lines) { global $OUTPUT; $text = implode(' ', $lines); $text = str_replace(array('\\t', '\\n', '\\r'), array('', '', ''), $text); $startpos = strpos($text, '<question type'); $endpos = strpos($text, '</question>'); if ($startpos === false || $endpos === false) { return false; } preg_match("/<question type=[\"\\']([^\"\\']+)[\"\\']>/i", $text, $matches); $type = strtolower($matches[1]); // Multichoice or multianswerchoice. $questiontext = core_text::entities_to_utf8($this->stringbetween($text, '<text>', '</text>')); $questionhint = core_text::entities_to_utf8($this->stringbetween($text, '<hint>', '</hint>')); $questionaward = $this->stringbetween($text, '<award>', '</award>'); $optionlist = $this->stringbetween($text, '<answer>', '</answer>'); $optionlist = explode('<option', $optionlist); $n = 0; $optionscorrect = array(); $optionstext = array(); if ($type == 'multichoice') { foreach ($optionlist as $option) { if (trim($option) === '') { continue; } $correct = $this->stringbetween($option, ' correct="', '">'); $answer = $this->stringbetween($option, '">', '</option>'); $optionscorrect[$n] = $correct; $optionstext[$n] = core_text::entities_to_utf8($answer); ++$n; } } else { if ($type == 'multianswerchoice') { $numcorrect = 0; $totalaward = 0; $optionsaward = array(); foreach ($optionlist as $option) { if (trim($option) === '') { continue; } preg_match("/correct=\"([^\"]*)\"/i", $option, $correctmatch); preg_match("/award=\"([^\"]*)\"/i", $option, $awardmatch); $correct = $correctmatch[1]; $award = $awardmatch[1]; if ($correct == 'yes') { $totalaward += $award; ++$numcorrect; } $answer = $this->stringbetween($option, '">', '</option>'); $optionscorrect[$n] = $correct; $optionstext[$n] = core_text::entities_to_utf8($answer); $optionsaward[$n] = $award; ++$n; } } else { echo $OUTPUT->notification(get_string('unknownorunhandledtype', 'question', $type)); } } $question = $this->defaultquestion(); $question->qtype = 'multichoice'; $question->name = $this->create_default_question_name($questiontext, get_string('questionname', 'question')); $this->add_blank_combined_feedback($question); $question->questiontext = $questiontext; $question->questiontextformat = FORMAT_HTML; $question->single = $type == 'multichoice' ? 1 : 0; $question->fraction = array(); $question->answer = array(); for ($n = 0; $n < count($optionstext); ++$n) { if ($optionstext[$n]) { if (!isset($numcorrect)) { // Single answer. if ($optionscorrect[$n] == 'yes') { $fraction = (int) $questionaward; } else { $fraction = 0; } } else { // Multiple answers. if ($optionscorrect[$n] == 'yes') { $fraction = $optionsaward[$n] / $totalaward; } else { $fraction = -$optionsaward[$n] / count($optionstext); } } $question->fraction[] = $fraction; $question->answer[] = array('text' => $optionstext[$n], 'format' => FORMAT_HTML); $question->feedback[] = array('text' => '', 'format' => FORMAT_HTML); // No feedback in this type. } } return $question; }
/** * Simplifies a string according to indexing rules. */ function tokenise_simplify($text, $overlap_cjk, $join_numbers) { // Decode entities to UTF-8 $text = core_text::entities_to_utf8($text, true); // Lowercase $text = core_text::strtolower($text); // Simple CJK handling if ($overlap_cjk) { $text = preg_replace_callback('/[' . PREG_CLASS_CJK . ']+/u', 'tokenise_expand_cjk', $text); } // To improve searching for numerical data such as dates, IP addresses // or version numbers, we consider a group of numerical characters // separated only by punctuation characters to be one piece. // This also means that searching for e.g. '20/03/1984' also returns // results with '20-03-1984' in them. // Readable regexp: ([number]+)[punctuation]+(?=[number]) if ($join_numbers) { $text = preg_replace('/([' . PREG_CLASS_NUMBERS . ']+)[' . PREG_CLASS_PUNCTUATION . ']+(?=[' . PREG_CLASS_NUMBERS . '])/u', '\\1', $text); } else { // Keep all the detected numbers+punctuation in a safe place in order to restore them later preg_match_all('/[' . PREG_CLASS_NUMBERS . ']+[' . PREG_CLASS_PUNCTUATION . PREG_CLASS_NUMBERS . ']+/u', $text, $foundseqs); $storedseqs = array(); foreach (array_unique($foundseqs[0]) as $ntkey => $value) { $prefix = (string) (count($storedseqs) + 1); $storedseqs[START_DELIM . $prefix . CENTER_DELIM . $ntkey . END_DELIM] = $value; } if (!empty($storedseqs)) { $text = str_replace($storedseqs, array_keys($storedseqs), $text); } } // The dot, underscore and dash are simply removed. This allows meaningful // search behaviour with acronyms and URLs. $text = preg_replace('/[._-]+/', '', $text); // With the exception of the rules above, we consider all punctuation, // marks, spacers, etc, to be a word boundary. $text = preg_replace('/[' . PREG_CLASS_SEARCH_EXCLUDE . ']+/u', ' ', $text); // Restore, if not joining numbers, recover the original strings if (!$join_numbers) { if (!empty($storedseqs)) { $text = str_replace(array_keys($storedseqs), $storedseqs, $text); } } return $text; }