public static function makeTitleValid($text) { $text = self::stripWikitext($text); $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); static $rxTc; if (is_callable('MediaWikiTitleCodec::getTitleInvalidRegex')) { $rxTc = MediaWikiTitleCodec::getTitleInvalidRegex(); } elseif (is_callable(array('Title', 'getTitleInvalidRegex'))) { // Pre-1.25 compat $rxTc = Title::getTitleInvalidRegex(); } elseif (!$rxTc) { // Back-compat $rxTc = '/' . '[^' . Title::legalChars() . ']' . '|%[0-9A-Fa-f]{2}' . '|&[A-Za-z0-9\\x80-\\xff]+;' . '|&#[0-9]+;' . '|&#x[0-9A-Fa-f]+;' . '/S'; } $text = preg_replace($rxTc, '_', $text); return $text; }
/** * Returns true if a language code string is of a valid form, whether or * not it exists. This includes codes which are used solely for * customisation via the MediaWiki namespace. * * @param string $code * * @return bool */ public static function isValidCode($code) { static $cache = array(); if (isset($cache[$code])) { return $cache[$code]; } // People think language codes are html safe, so enforce it. // Ideally we should only allow a-zA-Z0-9- // but, .+ and other chars are often used for {{int:}} hacks // see bugs 37564, 37587, 36938 $cache[$code] = strcspn($code, ":/\\&<>'\"") === strlen($code) && !preg_match(Title::getTitleInvalidRegex(), $code); return $cache[$code]; }
/** * Returns true if a language code string is of a valid form, whether or * not it exists. This includes codes which are used solely for * customisation via the MediaWiki namespace. * * @param $code string * * @return bool */ public static function isValidCode($code) { return strcspn($code, ":/\\&<>'\"") === strlen($code) && !preg_match(Title::getTitleInvalidRegex(), $code); }
/** * Sanitize text for use as filename and article title * @param string $titleText title to sanitize * @param string $replaceChar character to replace illegal characters with * @return string sanitized title */ public static function sanitizeTitle($titleText, $replaceChar = ' ') { wfProfileIn(__METHOD__); foreach (self::$ILLEGAL_TITLE_CHARS as $illegalChar) { $titleText = str_replace($illegalChar, $replaceChar, $titleText); } $titleText = preg_replace(Title::getTitleInvalidRegex(), $replaceChar, $titleText); // remove multiple spaces $aTitle = explode($replaceChar, $titleText); $sTitle = implode($replaceChar, array_filter($aTitle)); // array_filter() removes null elements $sTitle = substr($sTitle, 0, self::$IMAGE_NAME_MAX_LENGTH); // DB column Image.img_name has size 255 wfProfileOut(__METHOD__); return trim($sTitle); /* // remove all characters that are not alphanumeric. $sanitized = preg_replace( '/[^[:alnum:]]{1,}/', $replaceChar, $titleText ); return $sanitized; */ }
private function validateBoardData($boardTitle, $boardDescription) { $this->status = 'error'; $this->errorfield = ''; $this->errormsg = ''; // Trim spaces (CONN-167) $boardTitle = WikiaSanitizer::unicodeTrim($boardTitle); $boardDescription = WikiaSanitizer::unicodeTrim($boardDescription); // Reject illegal characters. $rxTc = Title::getTitleInvalidRegex(); if (preg_match($rxTc, $boardTitle) || is_null(Title::newFromText($boardTitle))) { $this->errorfield = 'boardTitle'; $this->errormsg = wfMessage('forum-board-title-validation-invalid')->escaped(); return false; } $forum = new Forum(); if ($forum->validateLength($boardTitle, 'title') !== Forum::LEN_OK) { $this->errorfield = 'boardTitle'; $this->errormsg = wfMessage('forum-board-title-validation-length')->escaped(); return false; } if ($forum->validateLength($boardDescription, 'desc') !== Forum::LEN_OK) { $this->errorfield = 'boardDescription'; $this->errormsg = wfMessage('forum-board-description-validation-length')->escaped(); return false; } return true; }
protected function sanitizeTitle($name) { wfProfileIn(__METHOD__); // sanitize title $name = preg_replace(Title::getTitleInvalidRegex(), ' ', $name); // get rid of slashes. these are technically allowed in article // titles, but they refer to subpages, which videos don't have $name = str_replace('/', ' ', $name); $name = str_replace(' ', ' ', $name); $name = substr($name, 0, self::MAX_TITLE_LENGTH); // DB column Image.img_name has size 255 $title = Title::makeTitleSafe(NS_VIDEO, $name); wfProfileOut(__METHOD__); return $title; }
/** * Normalizes and splits a title string. * * This function removes illegal characters, splits off the interwiki and * namespace prefixes, sets the other forms, and canonicalizes * everything. * * @todo this method is only exposed as a temporary measure to ease refactoring. * It was copied with minimal changes from Title::secureAndSplit(). * * @todo This method should be split up and an appropriate interface * defined for use by the Title class. * * @param string $text * @param int $defaultNamespace * * @throws MalformedTitleException If $text is not a valid title string. * @return array A mapp with the fields 'interwiki', 'fragment', 'namespace', * 'user_case_dbkey', and 'dbkey'. */ public function splitTitleString($text, $defaultNamespace = NS_MAIN) { $dbkey = str_replace(' ', '_', $text); # Initialisation $parts = array('interwiki' => '', 'local_interwiki' => false, 'fragment' => '', 'namespace' => $defaultNamespace, 'dbkey' => $dbkey, 'user_case_dbkey' => $dbkey); # Strip Unicode bidi override characters. # Sometimes they slip into cut-n-pasted page titles, where the # override chars get included in list displays. $dbkey = preg_replace('/\\xE2\\x80[\\x8E\\x8F\\xAA-\\xAE]/S', '', $dbkey); # Clean up whitespace # Note: use of the /u option on preg_replace here will cause # input with invalid UTF-8 sequences to be nullified out in PHP 5.2.x, # conveniently disabling them. $dbkey = preg_replace('/[ _\\xA0\\x{1680}\\x{180E}\\x{2000}-\\x{200A}\\x{2028}\\x{2029}\\x{202F}\\x{205F}\\x{3000}]+/u', '_', $dbkey); $dbkey = trim($dbkey, '_'); if (strpos($dbkey, UTF8_REPLACEMENT) !== false) { # Contained illegal UTF-8 sequences or forbidden Unicode chars. throw new MalformedTitleException('Bad UTF-8 sequences found in title: ' . $text); } $parts['dbkey'] = $dbkey; # Initial colon indicates main namespace rather than specified default # but should not create invalid {ns,title} pairs such as {0,Project:Foo} if ($dbkey !== '' && ':' == $dbkey[0]) { $parts['namespace'] = NS_MAIN; $dbkey = substr($dbkey, 1); # remove the colon but continue processing $dbkey = trim($dbkey, '_'); # remove any subsequent whitespace } if ($dbkey == '') { throw new MalformedTitleException('Empty title: ' . $text); } # Namespace or interwiki prefix $prefixRegexp = "/^(.+?)_*:_*(.*)\$/S"; do { $m = array(); if (preg_match($prefixRegexp, $dbkey, $m)) { $p = $m[1]; if (($ns = $this->language->getNsIndex($p)) !== false) { # Ordinary namespace $dbkey = $m[2]; $parts['namespace'] = $ns; # For Talk:X pages, check if X has a "namespace" prefix if ($ns == NS_TALK && preg_match($prefixRegexp, $dbkey, $x)) { if ($this->language->getNsIndex($x[1])) { # Disallow Talk:File:x type titles... throw new MalformedTitleException('Bad namespace prefix: ' . $text); } elseif (Interwiki::isValidInterwiki($x[1])) { //TODO: get rid of global state! # Disallow Talk:Interwiki:x type titles... throw new MalformedTitleException('Interwiki prefix found in title: ' . $text); } } } elseif (Interwiki::isValidInterwiki($p)) { # Interwiki link $dbkey = $m[2]; $parts['interwiki'] = $this->language->lc($p); # Redundant interwiki prefix to the local wiki foreach ($this->localInterwikis as $localIW) { if (0 == strcasecmp($parts['interwiki'], $localIW)) { if ($dbkey == '') { # Empty self-links should point to the Main Page, to ensure # compatibility with cross-wiki transclusions and the like. $mainPage = Title::newMainPage(); return array('interwiki' => $mainPage->getInterwiki(), 'local_interwiki' => true, 'fragment' => $mainPage->getFragment(), 'namespace' => $mainPage->getNamespace(), 'dbkey' => $mainPage->getDBkey(), 'user_case_dbkey' => $mainPage->getUserCaseDBKey()); } $parts['interwiki'] = ''; # local interwikis should behave like initial-colon links $parts['local_interwiki'] = true; # Do another namespace split... continue 2; } } # If there's an initial colon after the interwiki, that also # resets the default namespace if ($dbkey !== '' && $dbkey[0] == ':') { $parts['namespace'] = NS_MAIN; $dbkey = substr($dbkey, 1); } } # If there's no recognized interwiki or namespace, # then let the colon expression be part of the title. } break; } while (true); $fragment = strstr($dbkey, '#'); if (false !== $fragment) { $parts['fragment'] = str_replace('_', ' ', substr($fragment, 1)); $dbkey = substr($dbkey, 0, strlen($dbkey) - strlen($fragment)); # remove whitespace again: prevents "Foo_bar_#" # becoming "Foo_bar_" $dbkey = preg_replace('/_*$/', '', $dbkey); } # Reject illegal characters. $rxTc = Title::getTitleInvalidRegex(); if (preg_match($rxTc, $dbkey)) { throw new MalformedTitleException('Illegal characters found in title: ' . $text); } # Pages with "/./" or "/../" appearing in the URLs will often be un- # reachable due to the way web browsers deal with 'relative' URLs. # Also, they conflict with subpage syntax. Forbid them explicitly. if (strpos($dbkey, '.') !== false && ($dbkey === '.' || $dbkey === '..' || strpos($dbkey, './') === 0 || strpos($dbkey, '../') === 0 || strpos($dbkey, '/./') !== false || strpos($dbkey, '/../') !== false || substr($dbkey, -2) == '/.' || substr($dbkey, -3) == '/..')) { throw new MalformedTitleException('Bad title: ' . $text); } # Magic tilde sequences? Nu-uh! if (strpos($dbkey, '~~~') !== false) { throw new MalformedTitleException('Bad title: ' . $text); } # Limit the size of titles to 255 bytes. This is typically the size of the # underlying database field. We make an exception for special pages, which # don't need to be stored in the database, and may edge over 255 bytes due # to subpage syntax for long titles, e.g. [[Special:Block/Long name]] if ($parts['namespace'] != NS_SPECIAL && strlen($dbkey) > 255 || strlen($dbkey) > 512) { throw new MalformedTitleException('Title too long: ' . substr($dbkey, 0, 255) . '...'); } # Normally, all wiki links are forced to have an initial capital letter so [[foo]] # and [[Foo]] point to the same place. Don't force it for interwikis, since the # other site might be case-sensitive. $parts['user_case_dbkey'] = $dbkey; if ($parts['interwiki'] === '') { $dbkey = Title::capitalize($dbkey, $parts['namespace']); } # Can't make a link to a namespace alone... "empty" local links can only be # self-links with a fragment identifier. if ($dbkey == '' && $parts['interwiki'] === '') { if ($parts['namespace'] != NS_MAIN) { throw new MalformedTitleException('Empty title: ' . $text); } } // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles. // IP names are not allowed for accounts, and can only be referring to // edits from the IP. Given '::' abbreviations and caps/lowercaps, // there are numerous ways to present the same IP. Having sp:contribs scan // them all is silly and having some show the edits and others not is // inconsistent. Same for talk/userpages. Keep them normalized instead. if ($parts['namespace'] == NS_USER || $parts['namespace'] == NS_USER_TALK) { $dbkey = IP::sanitizeIP($dbkey); } // Any remaining initial :s are illegal. if ($dbkey !== '' && ':' == $dbkey[0]) { throw new MalformedTitleException('Title must not start with a colon: ' . $text); } # Fill fields $parts['dbkey'] = $dbkey; return $parts; }
function expandTemplate($bits, $deps) { global $wgContLang; $args = $bits['parts']; $part1Expr = $this->expand($bits['title'], $deps); $functionName = false; $part1Literal = false; $invalidTitle = false; if ($part1Expr->op === 'concat' && $part1Expr->args[0]->op === 'literal') { $part1LeadString = $part1Expr->args[0]->args[0]; $colonPos = strpos($part1LeadString, ':'); if ($colonPos !== false) { $functionName = ltrim(substr($part1LeadString, 0, $colonPos)); $arg1Expr = clone $part1Expr; if ($colonPos == strlen($part1LeadString) - 1) { array_shift($arg1Expr->args); } else { $arg1Expr->args[0] = $this->newLiteral(substr($part1LeadString, $colonPos + 1)); } } if (preg_match(Title::getTitleInvalidRegex(), ltrim($part1LeadString))) { $invalidTitle = true; } } elseif ($part1Expr->op === 'literal') { $part1Literal = $part1Expr->args[0]; $colonPos = strpos($part1Literal, ':'); if ($colonPos !== false) { $functionName = substr($part1Literal, 0, $colonPos); $arg1Expr = $this->newLiteral(substr($part1Literal, $colonPos + 1)); } } if ($functionName !== false) { if (isset($this->parser->mFunctionSynonyms[1][$functionName])) { $funcWordId = $this->parser->mFunctionSynonyms[1][$functionName]; } else { $functionName = $wgContLang->lc($functionName); if (isset($this->parser->mFunctionSynonyms[0][$functionName])) { $funcWordId = $this->parser->mFunctionSynonyms[0][$functionName]; } else { $funcWordId = false; } } if ($funcWordId !== false) { $funcLuaId = $this->parserFunctionToIdentifier($funcWordId); $funcArgs = array($arg1Expr); for ($i = 0; $i < $args->getLength(); $i++) { $funcArgs[] = $this->expand($args->item($i), $deps); } return $this->newParserFunctionCall($funcLuaId, $funcArgs, $deps); } } if ($part1Literal !== false) { $title = Title::newFromText($part1Literal, NS_TEMPLATE); if ($title) { // Register the template in $deps for later expansion $deps->addTemplate($title->getPrefixedDBkey()); $fname = $this->titleToIdentifier($title); } } elseif ($invalidTitle) { $title = false; } else { $title = Title::newFromText('dynamic'); } if (!$title) { // Invalid title $tplFrame = $this->preprocessor->newFrame(); $origNode = $tplFrame->virtualBracketedImplode('{{', '|', '}}', $bits['title'], $args); return $this->expand($origNode, $deps); } // Create a call to the template function $parentTplFrame = $this->preprocessor->newFrame(); $tplFrame = $parentTplFrame->newChild($args, $title); $templateArgs = array(); foreach ($tplFrame->numberedArgs as $i => $arg) { $expr = $this->newTrim($this->expand($arg, $deps), $deps); $templateArgs[$i] = $expr; } foreach ($tplFrame->namedArgs as $name => $arg) { $expr = $this->expand($arg, $deps); $templateArgs[$name] = $expr; } if ($part1Literal === false) { // Do a dynamic call $funcArgs = array($this->newLiteral('mw_dynamic_template'), $this->newTrim($part1Expr, $deps), $this->newHash($templateArgs)); return $this->newExpression('call', $funcArgs); } else { // Do a regular call $funcArgs = array($this->newLiteral($fname), $this->newHash($templateArgs)); return $this->newExpression('call', $funcArgs); } }