public static function replaceIdxByData($var) { $data = RTEData::get('data', intval($var[1])); if (isset($data['type'])) { if (isset($data['wikitextIdx'])) { $data['wikitext'] = RTEData::get('wikitext', $data['wikitextIdx']); // macbre: correctly handle and unmark entities inside links wikitext (RT #38844) $data['wikitext'] = htmlspecialchars_decode($data['wikitext']); $data['wikitext'] = RTEParser::unmarkEntities($data['wikitext']); unset($data['wikitextIdx']); if (strpos($data['wikitext'], '_rte_wikitextidx') !== false) { RTE::$edgeCases[] = 'COMPLEX.01'; } else { if (strpos($data['wikitext'], '_rte_dataidx') !== false) { RTE::$edgeCases[] = 'COMPLEX.02'; } else { if ($data['type'] == 'double-brackets') { if (strrpos($data['wikitext'], '{{') !== 0 && strpos($data['wikitext'], '{{') !== strlen($data['wikitext']) - 2) { RTE::$edgeCases[] = 'COMPLEX.03'; } } else { if (strpos($data['wikitext'], "") !== false) { RTE::$edgeCases[] = 'COMPLEX.07'; } } } } } } return self::convertDataToAttributes($data); }
/** * Handle links * * @see http://www.mediawiki.org/wiki/Help:Links */ private function handleLink($node, $textContent) { wfProfileIn(__METHOD__); // get RTE data $data = self::getRTEData($node); // handle pasted links if (empty($data)) { $data = array('type' => 'pasted', 'link' => $node->getAttribute('href')); } // generate wikitext $out = ''; // unmark HTML entities and decode not marked HTML entities (RT #38844) $textContentOriginal = RTEParser::unmarkEntities($textContent, true); switch ($data['type']) { case 'internal': // following wikitext optimization will be performed: // // [[foo|foo]] -> [[foo]] // [[foo|foos]] -> [[foo]]s // check for possible trails $trail = false; // start link wikitext $out = "[["; // handle [[:Category:foo]] if (isset($data['noforce']) && $data['noforce'] == false) { $out .= ':'; } else { if (self::isNamespacedLink($node, $data)) { $out .= ':'; } } // [[<current_page_name>/foo|/foo]] -> [[/foo]] global $wgTitle; $pageName = $wgTitle->getPrefixedText(); if ($data['link'] == $pageName . $textContentOriginal) { $data['link'] = $textContent; } // support [[/foo/]] (RT #56095) and [[Page/foo|foo]] (RT #143377) // both are giving the same entries in $data - detect them using original wikitext if ($data['link'] == "{$pageName}/{$textContentOriginal}") { // keep [[/foo/]] links (RT #56095) if (strpos($data['wikitext'], '|') === false) { $data['link'] = "/{$textContent}/"; // don't check for possible trails and after-pipe descriptions $trail = ''; } } $out .= $data['link']; // check for possible trail // [[foo|foos]] -> [[foo]]s if (strlen($textContentOriginal) > strlen($data['link'])) { if (substr($textContentOriginal, 0, strlen($data['link'])) == $data['link']) { $possibleTrail = substr($textContentOriginal, strlen($data['link'])); // check against trail valid characters regexp // if there are matches, and there are no trailing characters // fbId::45461 - [[Tower|Towers of Wizardry]] should not convert to [[Tower]]s of Wizardry preg_match(self::getTrailRegex(), $possibleTrail, $matches); $trail = $matches && empty($matches[2]) ? $matches[1] : $trail; } } // link description after pipe if ($trail === false && $data['link'] != $textContentOriginal) { $out .= "|{$textContent}"; } // close link wikitext + trail $out .= "]]{$trail}"; // protect HTML entities (RT #38844) $out = RTEParser::markEntities($out); break; case 'external': // optimize external links // [http://wp.pl http://wp.pl] -> http://wp.pl if ($textContent == $data['link']) { $out = $data['link']; break; } // handle autonumbered links $autonumber = false; if (isset($data['linktype']) && $data['linktype'] == 'autonumber') { // validate text content - should be [x] if (preg_match("%\\[(\\d+)\\]%", $textContent)) { // yes, this is autonumbered external link $autonumber = true; } } $out = "[{$data['link']}"; if (!$autonumber) { // add link description $out .= " {$textContent}"; } $out .= ']'; break; case 'external-raw': // validate textContent (should be valid URL) $regex = '%' . self::getUrlProtocols() . '%'; if (preg_match($regex, $textContent)) { // let's return it as raw link $out = $textContent; } else { // URL text content has changed -> use external link like [http://wp.pl link] $out = "[{$data['link']} {$textContent}]"; } break; case 'pasted': // validate link (should be valid URL) $regex = '%' . self::getUrlProtocols() . '%'; if (preg_match($regex, $data['link'])) { // optimize wikisyntax if ($data['link'] == $textContent) { $out = $data['link']; } else { $out = "[{$data['link']} {$textContent}]"; } } else { // just return link content $out = $textContent; } break; } // RT #34043 $out = self::fixForTableCell($node, $out); wfProfileOut(__METHOD__); return $out; }
/** * Process [[ ]] wikilinks (RIL) * @return LinkHolderArray * * @private */ function replaceInternalLinks2(&$s) { wfProfileIn(__METHOD__); # RTE (Rich Text Editor) - begin # @author: Inez Korczyński global $wgRTEParserEnabled; # RTE (Rich Text Editor) - end wfProfileIn(__METHOD__ . '-setup'); static $tc = FALSE, $e1, $e1_img; # the % is needed to support urlencoded titles as well if (!$tc) { $tc = Title::legalChars() . '#%'; # Match a link having the form [[namespace:link|alternate]]trail $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; # Match cases where there is no "]]", which might still be images $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; } $holders = new LinkHolderArray($this); # split the entire text string on occurences of [[ $a = StringUtils::explode('[[', ' ' . $s); # get the first element (all text up to first [[), and remove the space we added $s = $a->current(); $a->next(); $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void" $s = substr($s, 1); $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension(); $e2 = null; if ($useLinkPrefixExtension) { # Match the end of a line for a word that's not followed by whitespace, # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched $e2 = wfMsgForContent('linkprefix'); } if (is_null($this->mTitle)) { wfProfileOut(__METHOD__ . '-setup'); wfProfileOut(__METHOD__); throw new MWException(__METHOD__ . ": \$this->mTitle is null\n"); } $nottalk = !$this->mTitle->isTalkPage(); if ($useLinkPrefixExtension) { $m = array(); if (preg_match($e2, $s, $m)) { $first_prefix = $m[2]; } else { $first_prefix = false; } } else { $prefix = ''; } if ($this->getConverterLanguage()->hasVariants()) { $selflink = $this->getConverterLanguage()->autoConvertToAllVariants($this->mTitle->getPrefixedText()); } else { $selflink = array($this->mTitle->getPrefixedText()); } $useSubpages = $this->areSubpagesAllowed(); wfProfileOut(__METHOD__ . '-setup'); # Loop for each link for (; $line !== false && $line !== null; $a->next(), $line = $a->current()) { # Check for excessive memory usage if ($holders->isBig()) { # Too big # Do the existence check, replace the link holders and clear the array $holders->replace($s); $holders->clear(); } # RTE (Rich Text Editor) - begin # @author: Inez Korczyński if (!empty($wgRTEParserEnabled)) { $RTE_wikitextIdx = RTEMarker::getDataIdx(RTEMarker::INTERNAL_WIKITEXT, $line); // decode entities inside links wikimarkup (RT #38844) if ($pos = strpos($line, ']]')) { // unmark entities inside link $link = substr($line, 0, $pos); $link = RTEParser::unmarkEntities($link); // leave the rest of the line untouched $line = $link . substr($line, $pos); } } # RTE - end if ($useLinkPrefixExtension) { wfProfileIn(__METHOD__ . '-prefixhandling'); if (preg_match($e2, $s, $m)) { $prefix = $m[2]; $s = $m[1]; } else { $prefix = ''; } # first link if ($first_prefix) { $prefix = $first_prefix; $first_prefix = false; } wfProfileOut(__METHOD__ . '-prefixhandling'); } $might_be_img = false; wfProfileIn(__METHOD__ . "-e1"); if (preg_match($e1, $line, $m)) { # page with normal text or alt $text = $m[2]; # If we get a ] at the beginning of $m[3] that means we have a link that's something like: # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row f***s up, # the real problem is with the $e1 regex # See bug 1300. # # Still some problems for cases where the ] is meant to be outside punctuation, # and no image is in sight. See bug 2095. # if ($text !== '' && substr($m[3], 0, 1) === ']' && strpos($text, '[') !== false) { $text .= ']'; # so that replaceExternalLinks($text) works later $m[3] = substr($m[3], 1); } # fix up urlencoded title texts if (strpos($m[1], '%') !== false) { # Should anchors '#' also be rejected? $m[1] = str_replace(array('<', '>'), array('<', '>'), rawurldecode($m[1])); } $trail = $m[3]; } elseif (preg_match($e1_img, $line, $m)) { # Invalid, but might be an image with a link in its caption $might_be_img = true; $text = $m[2]; if (strpos($m[1], '%') !== false) { $m[1] = rawurldecode($m[1]); } $trail = ""; } else { # Invalid form; output directly $s .= $prefix . '[[' . $line; wfProfileOut(__METHOD__ . "-e1"); continue; } wfProfileOut(__METHOD__ . "-e1"); wfProfileIn(__METHOD__ . "-misc"); # Don't allow internal links to pages containing # PROTO: where PROTO is a valid URL protocol; these # should be external links. if (preg_match('/^(?:' . wfUrlProtocols() . ')/', $m[1])) { $s .= $prefix . '[[' . $line; wfProfileOut(__METHOD__ . "-misc"); continue; } # Make subpage if necessary if ($useSubpages) { $link = $this->maybeDoSubpageLink($m[1], $text); } else { $link = $m[1]; } $noforce = substr($m[1], 0, 1) !== ':'; if (!$noforce) { # Strip off leading ':' $link = substr($link, 1); } wfProfileOut(__METHOD__ . "-misc"); wfProfileIn(__METHOD__ . "-title"); $nt = Title::newFromText($this->mStripState->unstripNoWiki($link)); if ($nt === null) { $s .= $prefix . '[[' . $line; wfProfileOut(__METHOD__ . "-title"); continue; } $ns = $nt->getNamespace(); $iw = $nt->getInterWiki(); wfProfileOut(__METHOD__ . "-title"); if ($might_be_img) { # if this is actually an invalid link wfProfileIn(__METHOD__ . "-might_be_img"); if ($ns == NS_FILE && $noforce) { # but might be an image $found = false; while (true) { # look at the next 'line' to see if we can close it there $a->next(); $next_line = $a->current(); if ($next_line === false || $next_line === null) { break; } $m = explode(']]', $next_line, 3); if (count($m) == 3) { # the first ]] closes the inner link, the second the image $found = true; $text .= "[[{$m[0]}]]{$m[1]}"; $trail = $m[2]; break; } elseif (count($m) == 2) { # if there's exactly one ]] that's fine, we'll keep looking $text .= "[[{$m[0]}]]{$m[1]}"; } else { # if $next_line is invalid too, we need look no further $text .= '[[' . $next_line; break; } } if (!$found) { # we couldn't find the end of this imageLink, so output it raw # but don't ignore what might be perfectly normal links in the text we've examined $holders->merge($this->replaceInternalLinks2($text)); $s .= "{$prefix}[[{$link}|{$text}"; # note: no $trail, because without an end, there *is* no trail wfProfileOut(__METHOD__ . "-might_be_img"); continue; } } else { # it's not an image, so output it raw $s .= "{$prefix}[[{$link}|{$text}"; # note: no $trail, because without an end, there *is* no trail wfProfileOut(__METHOD__ . "-might_be_img"); continue; } wfProfileOut(__METHOD__ . "-might_be_img"); } $wasblank = $text == ''; if ($wasblank) { $text = $link; } else { # Bug 4598 madness. Handle the quotes only if they come from the alternate part # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a> # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']] # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a> $text = $this->doQuotes($text); } # Link not escaped by : , create the various objects if ($noforce) { global $wgContLang; # Interwikis if (empty($wgRTEParserEnabled)) { # wikia wfProfileIn(__METHOD__ . "-interwiki"); if ($iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName($iw)) { $this->mOutput->addLanguageLink($nt->getFullText()); $s = rtrim($s . $prefix); $s .= trim($trail, "\n") == '' ? '' : $prefix . $trail; wfProfileOut(__METHOD__ . "-interwiki"); continue; } wfProfileOut(__METHOD__ . "-interwiki"); } # wikia if ($ns == NS_FILE) { wfProfileIn(__METHOD__ . "-image"); if (!wfIsBadImage($nt->getDBkey(), $this->mTitle)) { if ($wasblank) { # if no parameters were passed, $text # becomes something like "File:Foo.png", # which we don't want to pass on to the # image generator $text = ''; } else { # recursively parse links inside the image caption # actually, this will parse them in any other parameters, too, # but it might be hard to fix that, and it doesn't matter ATM $text = $this->replaceExternalLinks($text); $holders->merge($this->replaceInternalLinks2($text)); } # RTE (Rich Text Editor) - begin # @author: Inez Korczyński if (!empty($wgRTEParserEnabled)) { $text = RTEMarker::generate(RTEMarker::IMAGE_DATA, $RTE_wikitextIdx) . $text; } # RTE - end # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them /** wikia $s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text, $holders ) ) . $trail; wikia **/ # cater for new placeholder-in-template namespace - Bartek # TODO: Get the hell out with this code from here this can be done in hook handler fired in makeImage function - Inez if ("Template Placeholder" != $nt->getText()) { $s .= $prefix . $this->armorLinks($this->makeImage($nt, $text, $holders)) . $trail; } else { $s .= $prefix . $this->armorLinks(ImagePlaceholder_makeDullImage($nt, $text, $holders)) . $trail; } } else { $s .= $prefix . $trail; } wfProfileOut(__METHOD__ . "-image"); continue; } if ($ns == NS_CATEGORY) { wfProfileIn(__METHOD__ . "-category"); # RTE (Rich Text Editor) - begin # @author: Inez Korczyński # Category handling if (!empty($wgRTEParserEnabled)) { $dataIdx = RTEData::put('placeholder', array('type' => 'category', 'wikitextIdx' => $RTE_wikitextIdx)); $s .= $prefix . RTEMarker::generate(RTEMarker::PLACEHOLDER, $dataIdx) . $trail; } else { $s = rtrim($s . "\n"); # bug 87 if ($wasblank) { $sortkey = $this->getDefaultSort(); } else { $sortkey = $text; } $sortkey = Sanitizer::decodeCharReferences($sortkey); $sortkey = str_replace("\n", '', $sortkey); $sortkey = $this->getConverterLanguage()->convertCategoryKey($sortkey); $this->mOutput->addCategory($nt->getDBkey(), $sortkey); /** * Strip the whitespace Category links produce, see bug 87 * @todo We might want to use trim($tmp, "\n") here. */ $s .= trim($prefix . $trail, "\n") == '' ? '' : $prefix . $trail; } wfProfileOut(__METHOD__ . "-category"); continue; } # Wikia change begin # @author macbre $hookRet = wfRunHooks('ParserReplaceInternalLinks2NoForce', array(&$s, $nt, $prefix, $trail, isset($RTE_wikitextIdx) ? $RTE_wikitextIdx : null)); if ($hookRet === false) { continue; } # Wikia change end } # RTE (Rich Text Editor) - begin # @author: Inez Korczyński # No special handling for self-linking in RTE mode # Self-link checking if (empty($wgRTEParserEnabled) && $nt->getFragment() === '' && $ns != NS_SPECIAL) { if (in_array($nt->getPrefixedText(), $selflink, true)) { $s .= $prefix . Linker::makeSelfLinkObj($nt, $text, '', $trail); continue; } } # RTE - end # NS_MEDIA is a pseudo-namespace for linking directly to a file # @todo FIXME: Should do batch file existence checks, see comment below if ($ns == NS_MEDIA) { # RTE (Rich Text Editor) - begin # @author: macbre # BugId:1694 - handle [[Media:xxx]] as placeholders if (!empty($wgRTEParserEnabled)) { $dataIdx = RTEData::put('placeholder', array('type' => 'media', 'wikitextIdx' => $RTE_wikitextIdx)); $s .= $prefix . RTEMarker::generate(RTEMarker::PLACEHOLDER, $dataIdx) . $trail; continue; } # RTE - end wfProfileIn(__METHOD__ . "-media"); # Give extensions a chance to select the file revision for us $options = array(); $descQuery = false; wfRunHooks('BeforeParserFetchFileAndTitle', array($this, $nt, &$options, &$descQuery)); # Fetch and register the file (file title may be different via hooks) list($file, $nt) = $this->fetchFileAndTitle($nt, $options); # Cloak with NOPARSE to avoid replacement in replaceExternalLinks $s .= $prefix . $this->armorLinks(Linker::makeMediaLinkFile($nt, $file, $text)) . $trail; wfProfileOut(__METHOD__ . "-media"); continue; } wfProfileIn(__METHOD__ . "-always_known"); # RTE (Rich Text Editor) - begin # @author: Inez Korczyński if (!empty($wgRTEParserEnabled)) { $text = RTEMarker::generate(RTEMarker::INTERNAL_DATA, RTEData::put('data', array('type' => 'internal', 'wikitextIdx' => $RTE_wikitextIdx, 'text' => $text, 'link' => $link, 'wasblank' => $wasblank, 'noforce' => $noforce))) . $text; } # RTE - end # Some titles, such as valid special pages or files in foreign repos, should # be shown as bluelinks even though they're not included in the page table # # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do # batch file existence checks for NS_FILE and NS_MEDIA if ($iw == '' && $nt->isAlwaysKnown()) { $this->mOutput->addLink($nt); $s .= $this->makeKnownLinkHolder($nt, $text, array(), $trail, $prefix); } else { # Links will be added to the output link list after checking $s .= $holders->makeHolder($nt, $text, array(), $trail, $prefix); } wfProfileOut(__METHOD__ . "-always_known"); } wfProfileOut(__METHOD__); return $holders; }