public function normalize($text) { $text = strip_tags($text); // strips the HTML tags like <br /> and <nowiki> // remove the special characters which hold significance in wiki formatting $specialChars = array("'", "\"", '=', '--', '*', '|'); $text = str_replace($specialChars, '', $text); // remove the [[]] types of text $pattern = "/\\[\\[.*?\\]\\]|{{.*?}}/"; $text = preg_replace($pattern, '', $text); // remove links $pattern = "/\\b(https?|ftp|file):\\/\\/[-A-Z0-9+&@#\\/%?=~_|\$!:,.;]*[A-Z0-9+&@#\\/%=~_|\$]/i"; $text = preg_replace($pattern, '', $text); // remove the other special characters $specialChars = array('[', ']', '{', '}', ':', '/', ';', '?', '-', '$', "\\"); $text = str_replace($specialChars, '', $text); if (class_exists('Sanitizer')) { if (method_exists('Sanitizer', 'decodeCharReferencesAndNormalize')) { $text = Sanitizer::decodeCharReferencesAndNormalize($text); } } return $text; }
/** * Like Title::newFromText(), but throws MalformedTitleException when the title is invalid, * rather than returning null. * * The exception subclasses encode detailed information about why the title is invalid. * * @see Title::newFromText * * @since 1.25 * @param string $text Title text to check * @param int $defaultNamespace * @throws MalformedTitleException If the title is invalid * @return Title */ public static function newFromTextThrow($text, $defaultNamespace = NS_MAIN) { if (is_object($text)) { throw new MWException('Title::newFromTextThrow given an object'); } $cache = self::getTitleCache(); /** * Wiki pages often contain multiple links to the same page. * Title normalization and parsing can become expensive on * pages with many links, so we can save a little time by * caching them. * * In theory these are value objects and won't get changed... */ if ($defaultNamespace == NS_MAIN && $cache->has($text)) { return $cache->get($text); } # Convert things like é ā or 〗 into normalized (bug 14952) text $filteredText = Sanitizer::decodeCharReferencesAndNormalize($text); $t = new Title(); $t->mDbkeyform = strtr($filteredText, ' ', '_'); $t->mDefaultNamespace = intval($defaultNamespace); $t->secureAndSplit(); if ($defaultNamespace == NS_MAIN) { $cache->set($text, $t); } return $t; }
/** * Create a new Title from text, such as what one would find in a link. De- * codes any HTML entities in the text. * * @param $text String the link text; spaces, prefixes, and an * initial ':' indicating the main namespace are accepted. * @param $defaultNamespace Int the namespace to use if none is speci- * fied by a prefix. If you want to force a specific namespace even if * $text might begin with a namespace prefix, use makeTitle() or * makeTitleSafe(). * @return Title, or null on an error. */ public static function newFromText($text, $defaultNamespace = NS_MAIN) { if (is_object($text)) { throw new MWException('Title::newFromText given an object'); } /** * Wiki pages often contain multiple links to the same page. * Title normalization and parsing can become expensive on * pages with many links, so we can save a little time by * caching them. * * In theory these are value objects and won't get changed... */ if ($defaultNamespace == NS_MAIN && isset(Title::$titleCache[$text])) { return Title::$titleCache[$text]; } # Convert things like é ā or 〗 into normalized (bug 14952) text $filteredText = Sanitizer::decodeCharReferencesAndNormalize($text); $t = new Title(); $t->mDbkeyform = str_replace(' ', '_', $filteredText); $t->mDefaultNamespace = $defaultNamespace; static $cachedcount = 0; if ($t->secureAndSplit()) { if ($defaultNamespace == NS_MAIN) { if ($cachedcount >= self::CACHE_MAX) { # Avoid memory leaks on mass operations... Title::$titleCache = array(); $cachedcount = 0; } $cachedcount++; Title::$titleCache[$text] =& $t; } return $t; } else { $ret = null; return $ret; } }
/** * Create a new Title from text, such as what one would find in a link. De- * codes any HTML entities in the text. * * @param string $text The link text; spaces, prefixes, and an * initial ':' indicating the main namespace are accepted. * @param int $defaultNamespace The namespace to use if none is specified * by a prefix. If you want to force a specific namespace even if * $text might begin with a namespace prefix, use makeTitle() or * makeTitleSafe(). * @throws InvalidArgumentException * @return Title|null Title or null on an error. */ public static function newFromText($text, $defaultNamespace = NS_MAIN) { if (is_object($text)) { throw new InvalidArgumentException('$text must be a string.'); } elseif (!is_string($text)) { wfWarn(__METHOD__ . ': $text must be a string. This will throw an InvalidArgumentException in future.'); } $cache = self::getTitleCache(); /** * Wiki pages often contain multiple links to the same page. * Title normalization and parsing can become expensive on * pages with many links, so we can save a little time by * caching them. * * In theory these are value objects and won't get changed... */ if ($defaultNamespace == NS_MAIN && $cache->has($text)) { return $cache->get($text); } # Convert things like é ā or 〗 into normalized (bug 14952) text $filteredText = Sanitizer::decodeCharReferencesAndNormalize($text); $t = new Title(); $t->mDbkeyform = str_replace(' ', '_', $filteredText); $t->mDefaultNamespace = intval($defaultNamespace); if ($t->secureAndSplit()) { if ($defaultNamespace == NS_MAIN) { $cache->set($text, $t); } return $t; } else { return null; } }