public function normalize($text)
 {
     $text = strip_tags($text);
     // strips the HTML tags like <br /> and <nowiki>
     // remove the special characters which hold significance in wiki formatting
     $specialChars = array("'", "\"", '=', '--', '*', '|');
     $text = str_replace($specialChars, '', $text);
     // remove the [[]] types of text
     $pattern = "/\\[\\[.*?\\]\\]|{{.*?}}/";
     $text = preg_replace($pattern, '', $text);
     // remove links
     $pattern = "/\\b(https?|ftp|file):\\/\\/[-A-Z0-9+&@#\\/%?=~_|\$!:,.;]*[A-Z0-9+&@#\\/%=~_|\$]/i";
     $text = preg_replace($pattern, '', $text);
     // remove the other special characters
     $specialChars = array('[', ']', '{', '}', ':', '/', ';', '?', '-', '$', "\\");
     $text = str_replace($specialChars, '', $text);
     if (class_exists('Sanitizer')) {
         if (method_exists('Sanitizer', 'decodeCharReferencesAndNormalize')) {
             $text = Sanitizer::decodeCharReferencesAndNormalize($text);
         }
     }
     return $text;
 }
Exemplo n.º 2
0
 /**
  * Like Title::newFromText(), but throws MalformedTitleException when the title is invalid,
  * rather than returning null.
  *
  * The exception subclasses encode detailed information about why the title is invalid.
  *
  * @see Title::newFromText
  *
  * @since 1.25
  * @param string $text Title text to check
  * @param int $defaultNamespace
  * @throws MalformedTitleException If the title is invalid
  * @return Title
  */
 public static function newFromTextThrow($text, $defaultNamespace = NS_MAIN)
 {
     if (is_object($text)) {
         throw new MWException('Title::newFromTextThrow given an object');
     }
     $cache = self::getTitleCache();
     /**
      * Wiki pages often contain multiple links to the same page.
      * Title normalization and parsing can become expensive on
      * pages with many links, so we can save a little time by
      * caching them.
      *
      * In theory these are value objects and won't get changed...
      */
     if ($defaultNamespace == NS_MAIN && $cache->has($text)) {
         return $cache->get($text);
     }
     # Convert things like &eacute; &#257; or &#x3017; into normalized (bug 14952) text
     $filteredText = Sanitizer::decodeCharReferencesAndNormalize($text);
     $t = new Title();
     $t->mDbkeyform = strtr($filteredText, ' ', '_');
     $t->mDefaultNamespace = intval($defaultNamespace);
     $t->secureAndSplit();
     if ($defaultNamespace == NS_MAIN) {
         $cache->set($text, $t);
     }
     return $t;
 }
Exemplo n.º 3
0
 /**
  * Create a new Title from text, such as what one would find in a link. De-
  * codes any HTML entities in the text.
  *
  * @param $text String the link text; spaces, prefixes, and an
  *   initial ':' indicating the main namespace are accepted.
  * @param $defaultNamespace Int the namespace to use if none is speci-
  *   fied by a prefix.  If you want to force a specific namespace even if
  *   $text might begin with a namespace prefix, use makeTitle() or
  *   makeTitleSafe().
  * @return Title, or null on an error.
  */
 public static function newFromText($text, $defaultNamespace = NS_MAIN)
 {
     if (is_object($text)) {
         throw new MWException('Title::newFromText given an object');
     }
     /**
      * Wiki pages often contain multiple links to the same page.
      * Title normalization and parsing can become expensive on
      * pages with many links, so we can save a little time by
      * caching them.
      *
      * In theory these are value objects and won't get changed...
      */
     if ($defaultNamespace == NS_MAIN && isset(Title::$titleCache[$text])) {
         return Title::$titleCache[$text];
     }
     # Convert things like &eacute; &#257; or &#x3017; into normalized (bug 14952) text
     $filteredText = Sanitizer::decodeCharReferencesAndNormalize($text);
     $t = new Title();
     $t->mDbkeyform = str_replace(' ', '_', $filteredText);
     $t->mDefaultNamespace = $defaultNamespace;
     static $cachedcount = 0;
     if ($t->secureAndSplit()) {
         if ($defaultNamespace == NS_MAIN) {
             if ($cachedcount >= self::CACHE_MAX) {
                 # Avoid memory leaks on mass operations...
                 Title::$titleCache = array();
                 $cachedcount = 0;
             }
             $cachedcount++;
             Title::$titleCache[$text] =& $t;
         }
         return $t;
     } else {
         $ret = null;
         return $ret;
     }
 }
Exemplo n.º 4
0
 /**
  * Create a new Title from text, such as what one would find in a link. De-
  * codes any HTML entities in the text.
  *
  * @param string $text The link text; spaces, prefixes, and an
  *   initial ':' indicating the main namespace are accepted.
  * @param int $defaultNamespace The namespace to use if none is specified
  *   by a prefix.  If you want to force a specific namespace even if
  *   $text might begin with a namespace prefix, use makeTitle() or
  *   makeTitleSafe().
  * @throws InvalidArgumentException
  * @return Title|null Title or null on an error.
  */
 public static function newFromText($text, $defaultNamespace = NS_MAIN)
 {
     if (is_object($text)) {
         throw new InvalidArgumentException('$text must be a string.');
     } elseif (!is_string($text)) {
         wfWarn(__METHOD__ . ': $text must be a string. This will throw an InvalidArgumentException in future.');
     }
     $cache = self::getTitleCache();
     /**
      * Wiki pages often contain multiple links to the same page.
      * Title normalization and parsing can become expensive on
      * pages with many links, so we can save a little time by
      * caching them.
      *
      * In theory these are value objects and won't get changed...
      */
     if ($defaultNamespace == NS_MAIN && $cache->has($text)) {
         return $cache->get($text);
     }
     # Convert things like &eacute; &#257; or &#x3017; into normalized (bug 14952) text
     $filteredText = Sanitizer::decodeCharReferencesAndNormalize($text);
     $t = new Title();
     $t->mDbkeyform = str_replace(' ', '_', $filteredText);
     $t->mDefaultNamespace = intval($defaultNamespace);
     if ($t->secureAndSplit()) {
         if ($defaultNamespace == NS_MAIN) {
             $cache->set($text, $t);
         }
         return $t;
     } else {
         return null;
     }
 }