/** * Secure and split - main initialisation function for this object * * Assumes that mDbkeyform has been set, and is urldecoded * and uses underscores, but not otherwise munged. This function * removes illegal characters, splits off the interwiki and * namespace prefixes, sets the other forms, and canonicalizes * everything. * * @return Bool true on success */ private function secureAndSplit() { global $wgContLang, $wgLocalInterwiki; # Initialisation $this->mInterwiki = $this->mFragment = ''; $this->mNamespace = $this->mDefaultNamespace; # Usually NS_MAIN $dbkey = $this->mDbkeyform; # Strip Unicode bidi override characters. # Sometimes they slip into cut-n-pasted page titles, where the # override chars get included in list displays. $dbkey = preg_replace('/\\xE2\\x80[\\x8E\\x8F\\xAA-\\xAE]/S', '', $dbkey); # Clean up whitespace # Note: use of the /u option on preg_replace here will cause # input with invalid UTF-8 sequences to be nullified out in PHP 5.2.x, # conveniently disabling them. $dbkey = preg_replace('/[ _\\xA0\\x{1680}\\x{180E}\\x{2000}-\\x{200A}\\x{2028}\\x{2029}\\x{202F}\\x{205F}\\x{3000}]+/u', '_', $dbkey); $dbkey = trim($dbkey, '_'); if ($dbkey == '') { return false; } if (false !== strpos($dbkey, UTF8_REPLACEMENT)) { # Contained illegal UTF-8 sequences or forbidden Unicode chars. return false; } $this->mDbkeyform = $dbkey; # Initial colon indicates main namespace rather than specified default # but should not create invalid {ns,title} pairs such as {0,Project:Foo} if (':' == $dbkey[0]) { $this->mNamespace = NS_MAIN; $dbkey = substr($dbkey, 1); # remove the colon but continue processing $dbkey = trim($dbkey, '_'); # remove any subsequent whitespace } # Namespace or interwiki prefix $firstPass = true; $prefixRegexp = "/^(.+?)_*:_*(.*)\$/S"; do { $m = array(); if (preg_match($prefixRegexp, $dbkey, $m)) { $p = $m[1]; if (($ns = $wgContLang->getNsIndex($p)) !== false) { # Ordinary namespace $dbkey = $m[2]; $this->mNamespace = $ns; # For Talk:X pages, check if X has a "namespace" prefix if ($ns == NS_TALK && preg_match($prefixRegexp, $dbkey, $x)) { if ($wgContLang->getNsIndex($x[1])) { # Disallow Talk:File:x type titles... return false; } elseif (Interwiki::isValidInterwiki($x[1])) { # Disallow Talk:Interwiki:x type titles... return false; } } } elseif (Interwiki::isValidInterwiki($p)) { if (!$firstPass) { # Can't make a local interwiki link to an interwiki link. # That's just crazy! return false; } # Interwiki link $dbkey = $m[2]; $this->mInterwiki = $wgContLang->lc($p); # Redundant interwiki prefix to the local wiki if ($wgLocalInterwiki !== false && 0 == strcasecmp($this->mInterwiki, $wgLocalInterwiki)) { if ($dbkey == '') { # Can't have an empty self-link return false; } $this->mInterwiki = ''; $firstPass = false; # Do another namespace split... continue; } # If there's an initial colon after the interwiki, that also # resets the default namespace if ($dbkey !== '' && $dbkey[0] == ':') { $this->mNamespace = NS_MAIN; $dbkey = substr($dbkey, 1); } } # If there's no recognized interwiki or namespace, # then let the colon expression be part of the title. } break; } while (true); # We already know that some pages won't be in the database! if ($this->mInterwiki != '' || NS_SPECIAL == $this->mNamespace) { $this->mArticleID = 0; } $fragment = strstr($dbkey, '#'); if (false !== $fragment) { $this->setFragment($fragment); $dbkey = substr($dbkey, 0, strlen($dbkey) - strlen($fragment)); # remove whitespace again: prevents "Foo_bar_#" # becoming "Foo_bar_" $dbkey = preg_replace('/_*$/', '', $dbkey); } # Reject illegal characters. $rxTc = self::getTitleInvalidRegex(); if (preg_match($rxTc, $dbkey)) { return false; } # Pages with "/./" or "/../" appearing in the URLs will often be un- # reachable due to the way web browsers deal with 'relative' URLs. # Also, they conflict with subpage syntax. Forbid them explicitly. if (strpos($dbkey, '.') !== false && ($dbkey === '.' || $dbkey === '..' || strpos($dbkey, './') === 0 || strpos($dbkey, '../') === 0 || strpos($dbkey, '/./') !== false || strpos($dbkey, '/../') !== false || substr($dbkey, -2) == '/.' || substr($dbkey, -3) == '/..')) { return false; } # Magic tilde sequences? Nu-uh! if (strpos($dbkey, '~~~') !== false) { return false; } # Limit the size of titles to 255 bytes. This is typically the size of the # underlying database field. We make an exception for special pages, which # don't need to be stored in the database, and may edge over 255 bytes due # to subpage syntax for long titles, e.g. [[Special:Block/Long name]] if ($this->mNamespace != NS_SPECIAL && strlen($dbkey) > 255 || strlen($dbkey) > 512) { return false; } # Normally, all wiki links are forced to have an initial capital letter so [[foo]] # and [[Foo]] point to the same place. Don't force it for interwikis, since the # other site might be case-sensitive. $this->mUserCaseDBKey = $dbkey; if ($this->mInterwiki == '') { $dbkey = self::capitalize($dbkey, $this->mNamespace); } # Can't make a link to a namespace alone... "empty" local links can only be # self-links with a fragment identifier. if ($dbkey == '' && $this->mInterwiki == '' && $this->mNamespace != NS_MAIN) { return false; } // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles. // IP names are not allowed for accounts, and can only be referring to // edits from the IP. Given '::' abbreviations and caps/lowercaps, // there are numerous ways to present the same IP. Having sp:contribs scan // them all is silly and having some show the edits and others not is // inconsistent. Same for talk/userpages. Keep them normalized instead. $dbkey = $this->mNamespace == NS_USER || $this->mNamespace == NS_USER_TALK ? IP::sanitizeIP($dbkey) : $dbkey; // Any remaining initial :s are illegal. if ($dbkey !== '' && ':' == $dbkey[0]) { return false; } # Fill fields $this->mDbkeyform = $dbkey; $this->mUrlform = wfUrlencode($dbkey); $this->mTextform = str_replace('_', ' ', $dbkey); return true; }
/** * Normalizes and splits a title string. * * This function removes illegal characters, splits off the interwiki and * namespace prefixes, sets the other forms, and canonicalizes * everything. * * @todo this method is only exposed as a temporary measure to ease refactoring. * It was copied with minimal changes from Title::secureAndSplit(). * * @todo This method should be split up and an appropriate interface * defined for use by the Title class. * * @param string $text * @param int $defaultNamespace * * @throws MalformedTitleException If $text is not a valid title string. * @return array A mapp with the fields 'interwiki', 'fragment', 'namespace', * 'user_case_dbkey', and 'dbkey'. */ public function splitTitleString($text, $defaultNamespace = NS_MAIN) { $dbkey = str_replace(' ', '_', $text); # Initialisation $parts = array('interwiki' => '', 'local_interwiki' => false, 'fragment' => '', 'namespace' => $defaultNamespace, 'dbkey' => $dbkey, 'user_case_dbkey' => $dbkey); # Strip Unicode bidi override characters. # Sometimes they slip into cut-n-pasted page titles, where the # override chars get included in list displays. $dbkey = preg_replace('/\\xE2\\x80[\\x8E\\x8F\\xAA-\\xAE]/S', '', $dbkey); # Clean up whitespace # Note: use of the /u option on preg_replace here will cause # input with invalid UTF-8 sequences to be nullified out in PHP 5.2.x, # conveniently disabling them. $dbkey = preg_replace('/[ _\\xA0\\x{1680}\\x{180E}\\x{2000}-\\x{200A}\\x{2028}\\x{2029}\\x{202F}\\x{205F}\\x{3000}]+/u', '_', $dbkey); $dbkey = trim($dbkey, '_'); if (strpos($dbkey, UtfNormal\Constants::UTF8_REPLACEMENT) !== false) { # Contained illegal UTF-8 sequences or forbidden Unicode chars. throw new MalformedTitleException('title-invalid-utf8', $text); } $parts['dbkey'] = $dbkey; # Initial colon indicates main namespace rather than specified default # but should not create invalid {ns,title} pairs such as {0,Project:Foo} if ($dbkey !== '' && ':' == $dbkey[0]) { $parts['namespace'] = NS_MAIN; $dbkey = substr($dbkey, 1); # remove the colon but continue processing $dbkey = trim($dbkey, '_'); # remove any subsequent whitespace } if ($dbkey == '') { throw new MalformedTitleException('title-invalid-empty', $text); } # Namespace or interwiki prefix $prefixRegexp = "/^(.+?)_*:_*(.*)\$/S"; do { $m = array(); if (preg_match($prefixRegexp, $dbkey, $m)) { $p = $m[1]; if (($ns = $this->language->getNsIndex($p)) !== false) { # Ordinary namespace $dbkey = $m[2]; $parts['namespace'] = $ns; # For Talk:X pages, check if X has a "namespace" prefix if ($ns == NS_TALK && preg_match($prefixRegexp, $dbkey, $x)) { if ($this->language->getNsIndex($x[1])) { # Disallow Talk:File:x type titles... throw new MalformedTitleException('title-invalid-talk-namespace', $text); } elseif (Interwiki::isValidInterwiki($x[1])) { // TODO: get rid of global state! # Disallow Talk:Interwiki:x type titles... throw new MalformedTitleException('title-invalid-talk-namespace', $text); } } } elseif (Interwiki::isValidInterwiki($p)) { # Interwiki link $dbkey = $m[2]; $parts['interwiki'] = $this->language->lc($p); # Redundant interwiki prefix to the local wiki foreach ($this->localInterwikis as $localIW) { if (0 == strcasecmp($parts['interwiki'], $localIW)) { if ($dbkey == '') { # Empty self-links should point to the Main Page, to ensure # compatibility with cross-wiki transclusions and the like. $mainPage = Title::newMainPage(); return array('interwiki' => $mainPage->getInterwiki(), 'local_interwiki' => true, 'fragment' => $mainPage->getFragment(), 'namespace' => $mainPage->getNamespace(), 'dbkey' => $mainPage->getDBkey(), 'user_case_dbkey' => $mainPage->getUserCaseDBKey()); } $parts['interwiki'] = ''; # local interwikis should behave like initial-colon links $parts['local_interwiki'] = true; # Do another namespace split... continue 2; } } # If there's an initial colon after the interwiki, that also # resets the default namespace if ($dbkey !== '' && $dbkey[0] == ':') { $parts['namespace'] = NS_MAIN; $dbkey = substr($dbkey, 1); } } # If there's no recognized interwiki or namespace, # then let the colon expression be part of the title. } break; } while (true); $fragment = strstr($dbkey, '#'); if (false !== $fragment) { $parts['fragment'] = str_replace('_', ' ', substr($fragment, 1)); $dbkey = substr($dbkey, 0, strlen($dbkey) - strlen($fragment)); # remove whitespace again: prevents "Foo_bar_#" # becoming "Foo_bar_" $dbkey = preg_replace('/_*$/', '', $dbkey); } # Reject illegal characters. $rxTc = self::getTitleInvalidRegex(); $matches = array(); if (preg_match($rxTc, $dbkey, $matches)) { throw new MalformedTitleException('title-invalid-characters', $text, array($matches[0])); } # Pages with "/./" or "/../" appearing in the URLs will often be un- # reachable due to the way web browsers deal with 'relative' URLs. # Also, they conflict with subpage syntax. Forbid them explicitly. if (strpos($dbkey, '.') !== false && ($dbkey === '.' || $dbkey === '..' || strpos($dbkey, './') === 0 || strpos($dbkey, '../') === 0 || strpos($dbkey, '/./') !== false || strpos($dbkey, '/../') !== false || substr($dbkey, -2) == '/.' || substr($dbkey, -3) == '/..')) { throw new MalformedTitleException('title-invalid-relative', $text); } # Magic tilde sequences? Nu-uh! if (strpos($dbkey, '~~~') !== false) { throw new MalformedTitleException('title-invalid-magic-tilde', $text); } # Limit the size of titles to 255 bytes. This is typically the size of the # underlying database field. We make an exception for special pages, which # don't need to be stored in the database, and may edge over 255 bytes due # to subpage syntax for long titles, e.g. [[Special:Block/Long name]] $maxLength = $parts['namespace'] != NS_SPECIAL ? 255 : 512; if (strlen($dbkey) > $maxLength) { throw new MalformedTitleException('title-invalid-too-long', $text, array(Message::numParam($maxLength))); } # Normally, all wiki links are forced to have an initial capital letter so [[foo]] # and [[Foo]] point to the same place. Don't force it for interwikis, since the # other site might be case-sensitive. $parts['user_case_dbkey'] = $dbkey; if ($parts['interwiki'] === '') { $dbkey = Title::capitalize($dbkey, $parts['namespace']); } # Can't make a link to a namespace alone... "empty" local links can only be # self-links with a fragment identifier. if ($dbkey == '' && $parts['interwiki'] === '') { if ($parts['namespace'] != NS_MAIN) { throw new MalformedTitleException('title-invalid-empty', $text); } } // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles. // IP names are not allowed for accounts, and can only be referring to // edits from the IP. Given '::' abbreviations and caps/lowercaps, // there are numerous ways to present the same IP. Having sp:contribs scan // them all is silly and having some show the edits and others not is // inconsistent. Same for talk/userpages. Keep them normalized instead. if ($parts['namespace'] == NS_USER || $parts['namespace'] == NS_USER_TALK) { $dbkey = IP::sanitizeIP($dbkey); } // Any remaining initial :s are illegal. if ($dbkey !== '' && ':' == $dbkey[0]) { throw new MalformedTitleException('title-invalid-leading-colon', $text); } # Fill fields $parts['dbkey'] = $dbkey; return $parts; }
/** * Generate the URL out of the object reference * * @param string $objRef * @return bool|string */ private function getButtonHrefByObjectReference($objRef) { $arrObjRef = explode('|', $objRef); if (count($arrObjRef) > 1) { list($wiki, $title) = $arrObjRef; if (Interwiki::isValidInterwiki($wiki)) { return str_replace('$1', $title, Interwiki::fetch($wiki)->getURL()); } } return false; }
public function testArrayStorage() { $dewiki = ['iw_prefix' => 'de', 'iw_url' => 'http://de.wikipedia.org/wiki/', 'iw_local' => 1]; $zzwiki = ['iw_prefix' => 'zz', 'iw_url' => 'http://zzwiki.org/wiki/', 'iw_local' => 0]; $cdbData = $this->populateHash('en', [$dewiki], [$zzwiki]); $this->setWgInterwikiCache($cdbData); $this->assertEquals([$dewiki, $zzwiki], Interwiki::getAllPrefixes(), 'getAllPrefixes()'); $this->assertTrue(Interwiki::isValidInterwiki('de'), 'known prefix is valid'); $this->assertTrue(Interwiki::isValidInterwiki('zz'), 'known prefix is valid'); $interwiki = Interwiki::fetch('de'); $this->assertInstanceOf('Interwiki', $interwiki); $this->assertSame('http://de.wikipedia.org/wiki/', $interwiki->getURL(), 'getURL'); $this->assertSame(true, $interwiki->isLocal(), 'isLocal'); $interwiki = Interwiki::fetch('zz'); $this->assertInstanceOf('Interwiki', $interwiki); $this->assertSame('http://zzwiki.org/wiki/', $interwiki->getURL(), 'getURL'); $this->assertSame(false, $interwiki->isLocal(), 'isLocal'); }
/** * Secure and split - main initialisation function for this object * * Assumes that mDbkeyform has been set, and is urldecoded * and uses underscores, but not otherwise munged. This function * removes illegal characters, splits off the interwiki and * namespace prefixes, sets the other forms, and canonicalizes * everything. * @return \type{\bool} true on success */ private function secureAndSplit() { global $wgContLang, $wgLocalInterwiki, $wgCapitalLinks; # Initialisation static $rxTc = false; if (!$rxTc) { # Matching titles will be held as illegal. $rxTc = '/' . '[^' . Title::legalChars() . ']' . '|%[0-9A-Fa-f]{2}' . '|&[A-Za-z0-9\\x80-\\xff]+;' . '|&#[0-9]+;' . '|&#x[0-9A-Fa-f]+;' . '/S'; } $this->mInterwiki = $this->mFragment = ''; $this->mNamespace = $this->mDefaultNamespace; # Usually NS_MAIN $dbkey = $this->mDbkeyform; # Strip Unicode bidi override characters. # Sometimes they slip into cut-n-pasted page titles, where the # override chars get included in list displays. $dbkey = preg_replace('/\\xE2\\x80[\\x8E\\x8F\\xAA-\\xAE]/S', '', $dbkey); # Clean up whitespace # $dbkey = preg_replace('/[ _]+/', '_', $dbkey); $dbkey = trim($dbkey, '_'); if ('' == $dbkey) { return false; } if (false !== strpos($dbkey, UTF8_REPLACEMENT)) { # Contained illegal UTF-8 sequences or forbidden Unicode chars. return false; } $this->mDbkeyform = $dbkey; # Initial colon indicates main namespace rather than specified default # but should not create invalid {ns,title} pairs such as {0,Project:Foo} if (':' == $dbkey[0]) { $this->mNamespace = NS_MAIN; $dbkey = substr($dbkey, 1); # remove the colon but continue processing $dbkey = trim($dbkey, '_'); # remove any subsequent whitespace } # Namespace or interwiki prefix $firstPass = true; do { $m = array(); if (preg_match("/^(.+?)_*:_*(.*)\$/S", $dbkey, $m)) { $p = $m[1]; if ($ns = $wgContLang->getNsIndex($p)) { # Ordinary namespace $dbkey = $m[2]; $this->mNamespace = $ns; } elseif (Interwiki::isValidInterwiki($p)) { if (!$firstPass) { # Can't make a local interwiki link to an interwiki link. # That's just crazy! return false; } # Interwiki link $dbkey = $m[2]; $this->mInterwiki = $wgContLang->lc($p); # Redundant interwiki prefix to the local wiki if (0 == strcasecmp($this->mInterwiki, $wgLocalInterwiki)) { if ($dbkey == '') { # Can't have an empty self-link return false; } $this->mInterwiki = ''; $firstPass = false; # Do another namespace split... continue; } # If there's an initial colon after the interwiki, that also # resets the default namespace if ($dbkey !== '' && $dbkey[0] == ':') { $this->mNamespace = NS_MAIN; $dbkey = substr($dbkey, 1); } } # If there's no recognized interwiki or namespace, # then let the colon expression be part of the title. } break; } while (true); # We already know that some pages won't be in the database! # if ('' != $this->mInterwiki || NS_SPECIAL == $this->mNamespace) { $this->mArticleID = 0; } $fragment = strstr($dbkey, '#'); if (false !== $fragment) { $this->setFragment($fragment); $dbkey = substr($dbkey, 0, strlen($dbkey) - strlen($fragment)); # remove whitespace again: prevents "Foo_bar_#" # becoming "Foo_bar_" $dbkey = preg_replace('/_*$/', '', $dbkey); } # Reject illegal characters. # if (preg_match($rxTc, $dbkey)) { return false; } /** * Pages with "/./" or "/../" appearing in the URLs will often be un- * reachable due to the way web browsers deal with 'relative' URLs. * Also, they conflict with subpage syntax. Forbid them explicitly. */ if (strpos($dbkey, '.') !== false && ($dbkey === '.' || $dbkey === '..' || strpos($dbkey, './') === 0 || strpos($dbkey, '../') === 0 || strpos($dbkey, '/./') !== false || strpos($dbkey, '/../') !== false || substr($dbkey, -2) == '/.' || substr($dbkey, -3) == '/..')) { return false; } /** * Magic tilde sequences? Nu-uh! */ if (strpos($dbkey, '~~~') !== false) { return false; } /** * Limit the size of titles to 255 bytes. * This is typically the size of the underlying database field. * We make an exception for special pages, which don't need to be stored * in the database, and may edge over 255 bytes due to subpage syntax * for long titles, e.g. [[Special:Block/Long name]] */ if ($this->mNamespace != NS_SPECIAL && strlen($dbkey) > 255 || strlen($dbkey) > 512) { return false; } /** * Normally, all wiki links are forced to have * an initial capital letter so [[foo]] and [[Foo]] * point to the same place. * * Don't force it for interwikis, since the other * site might be case-sensitive. */ $this->mUserCaseDBKey = $dbkey; if ($wgCapitalLinks && $this->mInterwiki == '') { $dbkey = $wgContLang->ucfirst($dbkey); } /** * Can't make a link to a namespace alone... * "empty" local links can only be self-links * with a fragment identifier. */ if ($dbkey == '' && $this->mInterwiki == '' && $this->mNamespace != NS_MAIN) { return false; } // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles. // IP names are not allowed for accounts, and can only be referring to // edits from the IP. Given '::' abbreviations and caps/lowercaps, // there are numerous ways to present the same IP. Having sp:contribs scan // them all is silly and having some show the edits and others not is // inconsistent. Same for talk/userpages. Keep them normalized instead. $dbkey = $this->mNamespace == NS_USER || $this->mNamespace == NS_USER_TALK ? IP::sanitizeIP($dbkey) : $dbkey; // Any remaining initial :s are illegal. if ($dbkey !== '' && ':' == $dbkey[0]) { return false; } # Fill fields $this->mDbkeyform = $dbkey; $this->mUrlform = wfUrlencode($dbkey); $this->mTextform = str_replace('_', ' ', $dbkey); return true; }