예제 #1
0
 /**
  * Secure and split - main initialisation function for this object
  *
  * Assumes that mDbkeyform has been set, and is urldecoded
  * and uses underscores, but not otherwise munged.  This function
  * removes illegal characters, splits off the interwiki and
  * namespace prefixes, sets the other forms, and canonicalizes
  * everything.
  *
  * @return Bool true on success
  */
 private function secureAndSplit()
 {
     global $wgContLang, $wgLocalInterwiki;
     # Initialisation
     $this->mInterwiki = $this->mFragment = '';
     $this->mNamespace = $this->mDefaultNamespace;
     # Usually NS_MAIN
     $dbkey = $this->mDbkeyform;
     # Strip Unicode bidi override characters.
     # Sometimes they slip into cut-n-pasted page titles, where the
     # override chars get included in list displays.
     $dbkey = preg_replace('/\\xE2\\x80[\\x8E\\x8F\\xAA-\\xAE]/S', '', $dbkey);
     # Clean up whitespace
     # Note: use of the /u option on preg_replace here will cause
     # input with invalid UTF-8 sequences to be nullified out in PHP 5.2.x,
     # conveniently disabling them.
     $dbkey = preg_replace('/[ _\\xA0\\x{1680}\\x{180E}\\x{2000}-\\x{200A}\\x{2028}\\x{2029}\\x{202F}\\x{205F}\\x{3000}]+/u', '_', $dbkey);
     $dbkey = trim($dbkey, '_');
     if ($dbkey == '') {
         return false;
     }
     if (false !== strpos($dbkey, UTF8_REPLACEMENT)) {
         # Contained illegal UTF-8 sequences or forbidden Unicode chars.
         return false;
     }
     $this->mDbkeyform = $dbkey;
     # Initial colon indicates main namespace rather than specified default
     # but should not create invalid {ns,title} pairs such as {0,Project:Foo}
     if (':' == $dbkey[0]) {
         $this->mNamespace = NS_MAIN;
         $dbkey = substr($dbkey, 1);
         # remove the colon but continue processing
         $dbkey = trim($dbkey, '_');
         # remove any subsequent whitespace
     }
     # Namespace or interwiki prefix
     $firstPass = true;
     $prefixRegexp = "/^(.+?)_*:_*(.*)\$/S";
     do {
         $m = array();
         if (preg_match($prefixRegexp, $dbkey, $m)) {
             $p = $m[1];
             if (($ns = $wgContLang->getNsIndex($p)) !== false) {
                 # Ordinary namespace
                 $dbkey = $m[2];
                 $this->mNamespace = $ns;
                 # For Talk:X pages, check if X has a "namespace" prefix
                 if ($ns == NS_TALK && preg_match($prefixRegexp, $dbkey, $x)) {
                     if ($wgContLang->getNsIndex($x[1])) {
                         # Disallow Talk:File:x type titles...
                         return false;
                     } elseif (Interwiki::isValidInterwiki($x[1])) {
                         # Disallow Talk:Interwiki:x type titles...
                         return false;
                     }
                 }
             } elseif (Interwiki::isValidInterwiki($p)) {
                 if (!$firstPass) {
                     # Can't make a local interwiki link to an interwiki link.
                     # That's just crazy!
                     return false;
                 }
                 # Interwiki link
                 $dbkey = $m[2];
                 $this->mInterwiki = $wgContLang->lc($p);
                 # Redundant interwiki prefix to the local wiki
                 if ($wgLocalInterwiki !== false && 0 == strcasecmp($this->mInterwiki, $wgLocalInterwiki)) {
                     if ($dbkey == '') {
                         # Can't have an empty self-link
                         return false;
                     }
                     $this->mInterwiki = '';
                     $firstPass = false;
                     # Do another namespace split...
                     continue;
                 }
                 # If there's an initial colon after the interwiki, that also
                 # resets the default namespace
                 if ($dbkey !== '' && $dbkey[0] == ':') {
                     $this->mNamespace = NS_MAIN;
                     $dbkey = substr($dbkey, 1);
                 }
             }
             # If there's no recognized interwiki or namespace,
             # then let the colon expression be part of the title.
         }
         break;
     } while (true);
     # We already know that some pages won't be in the database!
     if ($this->mInterwiki != '' || NS_SPECIAL == $this->mNamespace) {
         $this->mArticleID = 0;
     }
     $fragment = strstr($dbkey, '#');
     if (false !== $fragment) {
         $this->setFragment($fragment);
         $dbkey = substr($dbkey, 0, strlen($dbkey) - strlen($fragment));
         # remove whitespace again: prevents "Foo_bar_#"
         # becoming "Foo_bar_"
         $dbkey = preg_replace('/_*$/', '', $dbkey);
     }
     # Reject illegal characters.
     $rxTc = self::getTitleInvalidRegex();
     if (preg_match($rxTc, $dbkey)) {
         return false;
     }
     # Pages with "/./" or "/../" appearing in the URLs will often be un-
     # reachable due to the way web browsers deal with 'relative' URLs.
     # Also, they conflict with subpage syntax.  Forbid them explicitly.
     if (strpos($dbkey, '.') !== false && ($dbkey === '.' || $dbkey === '..' || strpos($dbkey, './') === 0 || strpos($dbkey, '../') === 0 || strpos($dbkey, '/./') !== false || strpos($dbkey, '/../') !== false || substr($dbkey, -2) == '/.' || substr($dbkey, -3) == '/..')) {
         return false;
     }
     # Magic tilde sequences? Nu-uh!
     if (strpos($dbkey, '~~~') !== false) {
         return false;
     }
     # Limit the size of titles to 255 bytes. This is typically the size of the
     # underlying database field. We make an exception for special pages, which
     # don't need to be stored in the database, and may edge over 255 bytes due
     # to subpage syntax for long titles, e.g. [[Special:Block/Long name]]
     if ($this->mNamespace != NS_SPECIAL && strlen($dbkey) > 255 || strlen($dbkey) > 512) {
         return false;
     }
     # Normally, all wiki links are forced to have an initial capital letter so [[foo]]
     # and [[Foo]] point to the same place.  Don't force it for interwikis, since the
     # other site might be case-sensitive.
     $this->mUserCaseDBKey = $dbkey;
     if ($this->mInterwiki == '') {
         $dbkey = self::capitalize($dbkey, $this->mNamespace);
     }
     # Can't make a link to a namespace alone... "empty" local links can only be
     # self-links with a fragment identifier.
     if ($dbkey == '' && $this->mInterwiki == '' && $this->mNamespace != NS_MAIN) {
         return false;
     }
     // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles.
     // IP names are not allowed for accounts, and can only be referring to
     // edits from the IP. Given '::' abbreviations and caps/lowercaps,
     // there are numerous ways to present the same IP. Having sp:contribs scan
     // them all is silly and having some show the edits and others not is
     // inconsistent. Same for talk/userpages. Keep them normalized instead.
     $dbkey = $this->mNamespace == NS_USER || $this->mNamespace == NS_USER_TALK ? IP::sanitizeIP($dbkey) : $dbkey;
     // Any remaining initial :s are illegal.
     if ($dbkey !== '' && ':' == $dbkey[0]) {
         return false;
     }
     # Fill fields
     $this->mDbkeyform = $dbkey;
     $this->mUrlform = wfUrlencode($dbkey);
     $this->mTextform = str_replace('_', ' ', $dbkey);
     return true;
 }
예제 #2
0
 /**
  * Normalizes and splits a title string.
  *
  * This function removes illegal characters, splits off the interwiki and
  * namespace prefixes, sets the other forms, and canonicalizes
  * everything.
  *
  * @todo this method is only exposed as a temporary measure to ease refactoring.
  * It was copied with minimal changes from Title::secureAndSplit().
  *
  * @todo This method should be split up and an appropriate interface
  * defined for use by the Title class.
  *
  * @param string $text
  * @param int $defaultNamespace
  *
  * @throws MalformedTitleException If $text is not a valid title string.
  * @return array A mapp with the fields 'interwiki', 'fragment', 'namespace',
  *         'user_case_dbkey', and 'dbkey'.
  */
 public function splitTitleString($text, $defaultNamespace = NS_MAIN)
 {
     $dbkey = str_replace(' ', '_', $text);
     # Initialisation
     $parts = array('interwiki' => '', 'local_interwiki' => false, 'fragment' => '', 'namespace' => $defaultNamespace, 'dbkey' => $dbkey, 'user_case_dbkey' => $dbkey);
     # Strip Unicode bidi override characters.
     # Sometimes they slip into cut-n-pasted page titles, where the
     # override chars get included in list displays.
     $dbkey = preg_replace('/\\xE2\\x80[\\x8E\\x8F\\xAA-\\xAE]/S', '', $dbkey);
     # Clean up whitespace
     # Note: use of the /u option on preg_replace here will cause
     # input with invalid UTF-8 sequences to be nullified out in PHP 5.2.x,
     # conveniently disabling them.
     $dbkey = preg_replace('/[ _\\xA0\\x{1680}\\x{180E}\\x{2000}-\\x{200A}\\x{2028}\\x{2029}\\x{202F}\\x{205F}\\x{3000}]+/u', '_', $dbkey);
     $dbkey = trim($dbkey, '_');
     if (strpos($dbkey, UtfNormal\Constants::UTF8_REPLACEMENT) !== false) {
         # Contained illegal UTF-8 sequences or forbidden Unicode chars.
         throw new MalformedTitleException('title-invalid-utf8', $text);
     }
     $parts['dbkey'] = $dbkey;
     # Initial colon indicates main namespace rather than specified default
     # but should not create invalid {ns,title} pairs such as {0,Project:Foo}
     if ($dbkey !== '' && ':' == $dbkey[0]) {
         $parts['namespace'] = NS_MAIN;
         $dbkey = substr($dbkey, 1);
         # remove the colon but continue processing
         $dbkey = trim($dbkey, '_');
         # remove any subsequent whitespace
     }
     if ($dbkey == '') {
         throw new MalformedTitleException('title-invalid-empty', $text);
     }
     # Namespace or interwiki prefix
     $prefixRegexp = "/^(.+?)_*:_*(.*)\$/S";
     do {
         $m = array();
         if (preg_match($prefixRegexp, $dbkey, $m)) {
             $p = $m[1];
             if (($ns = $this->language->getNsIndex($p)) !== false) {
                 # Ordinary namespace
                 $dbkey = $m[2];
                 $parts['namespace'] = $ns;
                 # For Talk:X pages, check if X has a "namespace" prefix
                 if ($ns == NS_TALK && preg_match($prefixRegexp, $dbkey, $x)) {
                     if ($this->language->getNsIndex($x[1])) {
                         # Disallow Talk:File:x type titles...
                         throw new MalformedTitleException('title-invalid-talk-namespace', $text);
                     } elseif (Interwiki::isValidInterwiki($x[1])) {
                         // TODO: get rid of global state!
                         # Disallow Talk:Interwiki:x type titles...
                         throw new MalformedTitleException('title-invalid-talk-namespace', $text);
                     }
                 }
             } elseif (Interwiki::isValidInterwiki($p)) {
                 # Interwiki link
                 $dbkey = $m[2];
                 $parts['interwiki'] = $this->language->lc($p);
                 # Redundant interwiki prefix to the local wiki
                 foreach ($this->localInterwikis as $localIW) {
                     if (0 == strcasecmp($parts['interwiki'], $localIW)) {
                         if ($dbkey == '') {
                             # Empty self-links should point to the Main Page, to ensure
                             # compatibility with cross-wiki transclusions and the like.
                             $mainPage = Title::newMainPage();
                             return array('interwiki' => $mainPage->getInterwiki(), 'local_interwiki' => true, 'fragment' => $mainPage->getFragment(), 'namespace' => $mainPage->getNamespace(), 'dbkey' => $mainPage->getDBkey(), 'user_case_dbkey' => $mainPage->getUserCaseDBKey());
                         }
                         $parts['interwiki'] = '';
                         # local interwikis should behave like initial-colon links
                         $parts['local_interwiki'] = true;
                         # Do another namespace split...
                         continue 2;
                     }
                 }
                 # If there's an initial colon after the interwiki, that also
                 # resets the default namespace
                 if ($dbkey !== '' && $dbkey[0] == ':') {
                     $parts['namespace'] = NS_MAIN;
                     $dbkey = substr($dbkey, 1);
                 }
             }
             # If there's no recognized interwiki or namespace,
             # then let the colon expression be part of the title.
         }
         break;
     } while (true);
     $fragment = strstr($dbkey, '#');
     if (false !== $fragment) {
         $parts['fragment'] = str_replace('_', ' ', substr($fragment, 1));
         $dbkey = substr($dbkey, 0, strlen($dbkey) - strlen($fragment));
         # remove whitespace again: prevents "Foo_bar_#"
         # becoming "Foo_bar_"
         $dbkey = preg_replace('/_*$/', '', $dbkey);
     }
     # Reject illegal characters.
     $rxTc = self::getTitleInvalidRegex();
     $matches = array();
     if (preg_match($rxTc, $dbkey, $matches)) {
         throw new MalformedTitleException('title-invalid-characters', $text, array($matches[0]));
     }
     # Pages with "/./" or "/../" appearing in the URLs will often be un-
     # reachable due to the way web browsers deal with 'relative' URLs.
     # Also, they conflict with subpage syntax.  Forbid them explicitly.
     if (strpos($dbkey, '.') !== false && ($dbkey === '.' || $dbkey === '..' || strpos($dbkey, './') === 0 || strpos($dbkey, '../') === 0 || strpos($dbkey, '/./') !== false || strpos($dbkey, '/../') !== false || substr($dbkey, -2) == '/.' || substr($dbkey, -3) == '/..')) {
         throw new MalformedTitleException('title-invalid-relative', $text);
     }
     # Magic tilde sequences? Nu-uh!
     if (strpos($dbkey, '~~~') !== false) {
         throw new MalformedTitleException('title-invalid-magic-tilde', $text);
     }
     # Limit the size of titles to 255 bytes. This is typically the size of the
     # underlying database field. We make an exception for special pages, which
     # don't need to be stored in the database, and may edge over 255 bytes due
     # to subpage syntax for long titles, e.g. [[Special:Block/Long name]]
     $maxLength = $parts['namespace'] != NS_SPECIAL ? 255 : 512;
     if (strlen($dbkey) > $maxLength) {
         throw new MalformedTitleException('title-invalid-too-long', $text, array(Message::numParam($maxLength)));
     }
     # Normally, all wiki links are forced to have an initial capital letter so [[foo]]
     # and [[Foo]] point to the same place.  Don't force it for interwikis, since the
     # other site might be case-sensitive.
     $parts['user_case_dbkey'] = $dbkey;
     if ($parts['interwiki'] === '') {
         $dbkey = Title::capitalize($dbkey, $parts['namespace']);
     }
     # Can't make a link to a namespace alone... "empty" local links can only be
     # self-links with a fragment identifier.
     if ($dbkey == '' && $parts['interwiki'] === '') {
         if ($parts['namespace'] != NS_MAIN) {
             throw new MalformedTitleException('title-invalid-empty', $text);
         }
     }
     // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles.
     // IP names are not allowed for accounts, and can only be referring to
     // edits from the IP. Given '::' abbreviations and caps/lowercaps,
     // there are numerous ways to present the same IP. Having sp:contribs scan
     // them all is silly and having some show the edits and others not is
     // inconsistent. Same for talk/userpages. Keep them normalized instead.
     if ($parts['namespace'] == NS_USER || $parts['namespace'] == NS_USER_TALK) {
         $dbkey = IP::sanitizeIP($dbkey);
     }
     // Any remaining initial :s are illegal.
     if ($dbkey !== '' && ':' == $dbkey[0]) {
         throw new MalformedTitleException('title-invalid-leading-colon', $text);
     }
     # Fill fields
     $parts['dbkey'] = $dbkey;
     return $parts;
 }
 /**
  * Generate the URL out of the object reference
  *
  * @param string $objRef
  * @return bool|string
  */
 private function getButtonHrefByObjectReference($objRef)
 {
     $arrObjRef = explode('|', $objRef);
     if (count($arrObjRef) > 1) {
         list($wiki, $title) = $arrObjRef;
         if (Interwiki::isValidInterwiki($wiki)) {
             return str_replace('$1', $title, Interwiki::fetch($wiki)->getURL());
         }
     }
     return false;
 }
예제 #4
0
 public function testArrayStorage()
 {
     $dewiki = ['iw_prefix' => 'de', 'iw_url' => 'http://de.wikipedia.org/wiki/', 'iw_local' => 1];
     $zzwiki = ['iw_prefix' => 'zz', 'iw_url' => 'http://zzwiki.org/wiki/', 'iw_local' => 0];
     $cdbData = $this->populateHash('en', [$dewiki], [$zzwiki]);
     $this->setWgInterwikiCache($cdbData);
     $this->assertEquals([$dewiki, $zzwiki], Interwiki::getAllPrefixes(), 'getAllPrefixes()');
     $this->assertTrue(Interwiki::isValidInterwiki('de'), 'known prefix is valid');
     $this->assertTrue(Interwiki::isValidInterwiki('zz'), 'known prefix is valid');
     $interwiki = Interwiki::fetch('de');
     $this->assertInstanceOf('Interwiki', $interwiki);
     $this->assertSame('http://de.wikipedia.org/wiki/', $interwiki->getURL(), 'getURL');
     $this->assertSame(true, $interwiki->isLocal(), 'isLocal');
     $interwiki = Interwiki::fetch('zz');
     $this->assertInstanceOf('Interwiki', $interwiki);
     $this->assertSame('http://zzwiki.org/wiki/', $interwiki->getURL(), 'getURL');
     $this->assertSame(false, $interwiki->isLocal(), 'isLocal');
 }
예제 #5
0
 /**
  * Secure and split - main initialisation function for this object
  *
  * Assumes that mDbkeyform has been set, and is urldecoded
  * and uses underscores, but not otherwise munged.  This function
  * removes illegal characters, splits off the interwiki and
  * namespace prefixes, sets the other forms, and canonicalizes
  * everything.
  * @return \type{\bool} true on success
  */
 private function secureAndSplit()
 {
     global $wgContLang, $wgLocalInterwiki, $wgCapitalLinks;
     # Initialisation
     static $rxTc = false;
     if (!$rxTc) {
         # Matching titles will be held as illegal.
         $rxTc = '/' . '[^' . Title::legalChars() . ']' . '|%[0-9A-Fa-f]{2}' . '|&[A-Za-z0-9\\x80-\\xff]+;' . '|&#[0-9]+;' . '|&#x[0-9A-Fa-f]+;' . '/S';
     }
     $this->mInterwiki = $this->mFragment = '';
     $this->mNamespace = $this->mDefaultNamespace;
     # Usually NS_MAIN
     $dbkey = $this->mDbkeyform;
     # Strip Unicode bidi override characters.
     # Sometimes they slip into cut-n-pasted page titles, where the
     # override chars get included in list displays.
     $dbkey = preg_replace('/\\xE2\\x80[\\x8E\\x8F\\xAA-\\xAE]/S', '', $dbkey);
     # Clean up whitespace
     #
     $dbkey = preg_replace('/[ _]+/', '_', $dbkey);
     $dbkey = trim($dbkey, '_');
     if ('' == $dbkey) {
         return false;
     }
     if (false !== strpos($dbkey, UTF8_REPLACEMENT)) {
         # Contained illegal UTF-8 sequences or forbidden Unicode chars.
         return false;
     }
     $this->mDbkeyform = $dbkey;
     # Initial colon indicates main namespace rather than specified default
     # but should not create invalid {ns,title} pairs such as {0,Project:Foo}
     if (':' == $dbkey[0]) {
         $this->mNamespace = NS_MAIN;
         $dbkey = substr($dbkey, 1);
         # remove the colon but continue processing
         $dbkey = trim($dbkey, '_');
         # remove any subsequent whitespace
     }
     # Namespace or interwiki prefix
     $firstPass = true;
     do {
         $m = array();
         if (preg_match("/^(.+?)_*:_*(.*)\$/S", $dbkey, $m)) {
             $p = $m[1];
             if ($ns = $wgContLang->getNsIndex($p)) {
                 # Ordinary namespace
                 $dbkey = $m[2];
                 $this->mNamespace = $ns;
             } elseif (Interwiki::isValidInterwiki($p)) {
                 if (!$firstPass) {
                     # Can't make a local interwiki link to an interwiki link.
                     # That's just crazy!
                     return false;
                 }
                 # Interwiki link
                 $dbkey = $m[2];
                 $this->mInterwiki = $wgContLang->lc($p);
                 # Redundant interwiki prefix to the local wiki
                 if (0 == strcasecmp($this->mInterwiki, $wgLocalInterwiki)) {
                     if ($dbkey == '') {
                         # Can't have an empty self-link
                         return false;
                     }
                     $this->mInterwiki = '';
                     $firstPass = false;
                     # Do another namespace split...
                     continue;
                 }
                 # If there's an initial colon after the interwiki, that also
                 # resets the default namespace
                 if ($dbkey !== '' && $dbkey[0] == ':') {
                     $this->mNamespace = NS_MAIN;
                     $dbkey = substr($dbkey, 1);
                 }
             }
             # If there's no recognized interwiki or namespace,
             # then let the colon expression be part of the title.
         }
         break;
     } while (true);
     # We already know that some pages won't be in the database!
     #
     if ('' != $this->mInterwiki || NS_SPECIAL == $this->mNamespace) {
         $this->mArticleID = 0;
     }
     $fragment = strstr($dbkey, '#');
     if (false !== $fragment) {
         $this->setFragment($fragment);
         $dbkey = substr($dbkey, 0, strlen($dbkey) - strlen($fragment));
         # remove whitespace again: prevents "Foo_bar_#"
         # becoming "Foo_bar_"
         $dbkey = preg_replace('/_*$/', '', $dbkey);
     }
     # Reject illegal characters.
     #
     if (preg_match($rxTc, $dbkey)) {
         return false;
     }
     /**
      * Pages with "/./" or "/../" appearing in the URLs will often be un-
      * reachable due to the way web browsers deal with 'relative' URLs.
      * Also, they conflict with subpage syntax.  Forbid them explicitly.
      */
     if (strpos($dbkey, '.') !== false && ($dbkey === '.' || $dbkey === '..' || strpos($dbkey, './') === 0 || strpos($dbkey, '../') === 0 || strpos($dbkey, '/./') !== false || strpos($dbkey, '/../') !== false || substr($dbkey, -2) == '/.' || substr($dbkey, -3) == '/..')) {
         return false;
     }
     /**
      * Magic tilde sequences? Nu-uh!
      */
     if (strpos($dbkey, '~~~') !== false) {
         return false;
     }
     /**
      * Limit the size of titles to 255 bytes.
      * This is typically the size of the underlying database field.
      * We make an exception for special pages, which don't need to be stored
      * in the database, and may edge over 255 bytes due to subpage syntax
      * for long titles, e.g. [[Special:Block/Long name]]
      */
     if ($this->mNamespace != NS_SPECIAL && strlen($dbkey) > 255 || strlen($dbkey) > 512) {
         return false;
     }
     /**
      * Normally, all wiki links are forced to have
      * an initial capital letter so [[foo]] and [[Foo]]
      * point to the same place.
      *
      * Don't force it for interwikis, since the other
      * site might be case-sensitive.
      */
     $this->mUserCaseDBKey = $dbkey;
     if ($wgCapitalLinks && $this->mInterwiki == '') {
         $dbkey = $wgContLang->ucfirst($dbkey);
     }
     /**
      * Can't make a link to a namespace alone...
      * "empty" local links can only be self-links
      * with a fragment identifier.
      */
     if ($dbkey == '' && $this->mInterwiki == '' && $this->mNamespace != NS_MAIN) {
         return false;
     }
     // Allow IPv6 usernames to start with '::' by canonicalizing IPv6 titles.
     // IP names are not allowed for accounts, and can only be referring to
     // edits from the IP. Given '::' abbreviations and caps/lowercaps,
     // there are numerous ways to present the same IP. Having sp:contribs scan
     // them all is silly and having some show the edits and others not is
     // inconsistent. Same for talk/userpages. Keep them normalized instead.
     $dbkey = $this->mNamespace == NS_USER || $this->mNamespace == NS_USER_TALK ? IP::sanitizeIP($dbkey) : $dbkey;
     // Any remaining initial :s are illegal.
     if ($dbkey !== '' && ':' == $dbkey[0]) {
         return false;
     }
     # Fill fields
     $this->mDbkeyform = $dbkey;
     $this->mUrlform = wfUrlencode($dbkey);
     $this->mTextform = str_replace('_', ' ', $dbkey);
     return true;
 }