Example #1
0
 function processPage($row)
 {
     global $wgContLang;
     $source = $row->img_name;
     if ($source == '') {
         // Ye olde empty rows. Just kill them.
         $this->killRow($source);
         return $this->progress(1);
     }
     $cleaned = $source;
     // About half of old bad image names have percent-codes
     $cleaned = rawurldecode($cleaned);
     // Some are old latin-1
     $cleaned = $wgContLang->checkTitleEncoding($cleaned);
     // Many of remainder look like non-normalized unicode
     $cleaned = UtfNormal::cleanUp($cleaned);
     $title = Title::makeTitleSafe(NS_IMAGE, $cleaned);
     if (is_null($title)) {
         $this->log("page {$source} ({$cleaned}) is illegal.");
         $safe = $this->buildSafeTitle($cleaned);
         $this->pokeFile($source, $safe);
         return $this->progress(1);
     }
     if ($title->getDbKey() !== $source) {
         $munged = $title->getDbKey();
         $this->log("page {$source} ({$munged}) doesn't match self.");
         $this->pokeFile($source, $munged);
         return $this->progress(1);
     }
     $this->progress(0);
 }
 /**
  * Format an XML element as with self::element(), but run text through the
  * UtfNormal::cleanUp() validator first to ensure that no invalid UTF-8
  * is passed.
  *
  * @param $element String:
  * @param $attribs Array: Name=>value pairs. Values will be escaped.
  * @param $contents String: NULL to make an open tag only; '' for a contentless closed tag (default)
  * @return string
  */
 public static function elementClean($element, $attribs = array(), $contents = '')
 {
     if ($attribs) {
         $attribs = array_map(array('UtfNormal', 'cleanUp'), $attribs);
     }
     if ($contents) {
         $contents = UtfNormal::cleanUp($contents);
     }
     return self::element($element, $attribs, $contents);
 }
Example #3
0
 /**
  * Format an XML element as with self::element(), but run text through the
  * UtfNormal::cleanUp() validator first to ensure that no invalid UTF-8
  * is passed.
  *
  * @param $element String:
  * @param $attribs Array: Name=>value pairs. Values will be escaped.
  * @param $contents String: NULL to make an open tag only; '' for a contentless closed tag (default)
  * @return string
  */
 public static function elementClean($element, $attribs = array(), $contents = '')
 {
     if ($attribs) {
         $attribs = array_map(array('UtfNormal', 'cleanUp'), $attribs);
     }
     if ($contents) {
         wfProfileIn(__METHOD__ . '-norm');
         $contents = UtfNormal::cleanUp($contents);
         wfProfileOut(__METHOD__ . '-norm');
     }
     return self::element($element, $attribs, $contents);
 }
Example #4
0
 function processPage($row)
 {
     $current = Title::makeTitle($row->wl_namespace, $row->wl_title);
     $display = $current->getPrefixedText();
     $verified = UtfNormal::cleanUp($display);
     $title = Title::newFromText($verified);
     if ($row->wl_user == 0 || is_null($title) || !$title->equals($current)) {
         $this->log("invalid watch by {$row->wl_user} for ({$row->wl_namespace}, \"{$row->wl_title}\")");
         $this->removeWatch($row);
         return $this->progress(1);
     }
     $this->progress(0);
 }
Example #5
0
 public static function formatDiffRow($title, $oldid, $newid, $timestamp, $comment, $actiontext = '')
 {
     global $wgFeedDiffCutoff, $wgContLang, $wgUser;
     wfProfileIn(__FUNCTION__);
     $skin = $wgUser->getSkin();
     # log enties
     $completeText = '<p>' . implode(' ', array_filter(array($actiontext, $skin->formatComment($comment)))) . "</p>\n";
     //NOTE: Check permissions for anonymous users, not current user.
     //      No "privileged" version should end up in the cache.
     //      Most feed readers will not log in anway.
     $anon = new User();
     $accErrors = $title->getUserPermissionsErrors('read', $anon, true);
     if ($title->getNamespace() >= 0 && !$accErrors) {
         if ($oldid) {
             wfProfileIn(__FUNCTION__ . "-dodiff");
             #$diffText = $de->getDiff( wfMsg( 'revisionasof',
             #	$wgContLang->timeanddate( $timestamp ) ),
             #	wfMsg( 'currentrev' ) );
             // Don't bother generating the diff if we won't be able to show it
             if ($wgFeedDiffCutoff > 0) {
                 $de = new DifferenceEngine($title, $oldid, $newid);
                 $diffText = $de->getDiff(wfMsg('previousrevision'), wfMsg('revisionasof', $wgContLang->timeanddate($timestamp)));
             }
             if (strlen($diffText) > $wgFeedDiffCutoff || $wgFeedDiffCutoff <= 0) {
                 // Omit large diffs
                 $diffLink = $title->escapeFullUrl('diff=' . $newid . '&oldid=' . $oldid);
                 $diffText = '<a href="' . $diffLink . '">' . htmlspecialchars(wfMsgForContent('showdiff')) . '</a>';
             } elseif ($diffText === false) {
                 // Error in diff engine, probably a missing revision
                 $diffText = "<p>Can't load revision {$newid}</p>";
             } else {
                 // Diff output fine, clean up any illegal UTF-8
                 $diffText = UtfNormal::cleanUp($diffText);
                 $diffText = self::applyDiffStyle($diffText);
             }
             wfProfileOut(__FUNCTION__ . "-dodiff");
         } else {
             $rev = Revision::newFromId($newid);
             if (is_null($rev)) {
                 $newtext = '';
             } else {
                 $newtext = $rev->getText();
             }
             $diffText = '<p><b>' . wfMsg('newpage') . '</b></p>' . '<div>' . nl2br(htmlspecialchars($newtext)) . '</div>';
         }
         $completeText .= $diffText;
     }
     wfProfileOut(__FUNCTION__);
     return $completeText;
 }
 /**
  * Returns the normalized form of the given page title, using the normalization rules of the given site.
  * If the given title is a redirect, the redirect weill be resolved and the redirect target is returned.
  *
  * @note  : This actually makes an API request to the remote site, so beware that this function is slow and depends
  *          on an external service.
  *
  * @note  : If MW_PHPUNIT_TEST is defined, the call to the external site is skipped, and the title
  *          is normalized using the local normalization rules as implemented by the Title class.
  *
  * @see Site::normalizePageName
  *
  * @since 1.21
  *
  * @param string $pageName
  *
  * @return string
  * @throws MWException
  */
 public function normalizePageName($pageName)
 {
     // Check if we have strings as arguments.
     if (!is_string($pageName)) {
         throw new MWException('$pageName must be a string');
     }
     // Go on call the external site
     if (defined('MW_PHPUNIT_TEST')) {
         // If the code is under test, don't call out to other sites, just normalize locally.
         // Note: this may cause results to be inconsistent with the actual normalization used by the respective remote site!
         $t = Title::newFromText($pageName);
         return $t->getPrefixedText();
     } else {
         // Make sure the string is normalized into NFC (due to the bug 40017)
         // but do nothing to the whitespaces, that should work appropriately.
         // @see https://bugzilla.wikimedia.org/show_bug.cgi?id=40017
         $pageName = UtfNormal::cleanUp($pageName);
         // Build the args for the specific call
         $args = array('action' => 'query', 'prop' => 'info', 'redirects' => true, 'converttitles' => true, 'format' => 'json', 'titles' => $pageName);
         $url = $this->getFileUrl('api.php') . '?' . wfArrayToCgi($args);
         // Go on call the external site
         //@todo: we need a good way to specify a timeout here.
         $ret = Http::get($url);
     }
     if ($ret === false) {
         wfDebugLog("MediaWikiSite", "call to external site failed: {$url}");
         return false;
     }
     $data = FormatJson::decode($ret, true);
     if (!is_array($data)) {
         wfDebugLog("MediaWikiSite", "call to <{$url}> returned bad json: " . $ret);
         return false;
     }
     $page = static::extractPageRecord($data, $pageName);
     if (isset($page['missing'])) {
         wfDebugLog("MediaWikiSite", "call to <{$url}> returned a marker for a missing page title! " . $ret);
         return false;
     }
     if (isset($page['invalid'])) {
         wfDebugLog("MediaWikiSite", "call to <{$url}> returned a marker for an invalid page title! " . $ret);
         return false;
     }
     if (!isset($page['title'])) {
         wfDebugLog("MediaWikiSite", "call to <{$url}> did not return a page title! " . $ret);
         return false;
     }
     return $page['title'];
 }
 function processPage($row)
 {
     $current = Title::makeTitle($row->page_namespace, $row->page_title);
     $display = $current->getPrefixedText();
     $verified = UtfNormal::cleanUp($display);
     $title = Title::newFromText($verified);
     if (is_null($title)) {
         $this->log("page {$row->page_id} ({$display}) is illegal.");
         $this->moveIllegalPage($row);
         return $this->progress(1);
     }
     if (!$title->equals($current)) {
         $this->log("page {$row->page_id} ({$display}) doesn't match self.");
         $this->moveInconsistentPage($row, $title);
         return $this->progress(1);
     }
     $this->progress(0);
 }
 function makeInputText($max = false)
 {
     if ($max === false) {
         $max = $this->maxLength;
     }
     $length = mt_rand($this->minLength, $max);
     $s = '';
     for ($i = 0; $i < $length; $i++) {
         $hairIndex = mt_rand(0, count($this->hairs) - 1);
         $s .= $this->hairs[$hairIndex];
     }
     // Send through the UTF-8 normaliser
     // This resolves a few differences between the old preprocessor and the
     // XML-based one, which doesn't like illegals and converts line endings.
     // It's done by the MW UI, so it's a reasonably legitimate thing to do.
     $s = UtfNormal::cleanUp($s);
     return $s;
 }
Example #9
0
 function processPage($row)
 {
     $current = Title::makeTitle($row->page_namespace, $row->page_title);
     $display = $current->getPrefixedText();
     $verified = UtfNormal::cleanUp($display);
     $title = Title::newFromText($verified);
     if (!is_null($title) && $title->equals($current) && $title->canExist()) {
         return $this->progress(0);
         // all is fine
     }
     if ($row->page_namespace == NS_FILE && $this->fileExists($row->page_title)) {
         $this->log("file {$row->page_title} needs cleanup, please run cleanupImages.php.");
         return $this->progress(0);
     } elseif (is_null($title)) {
         $this->log("page {$row->page_id} ({$display}) is illegal.");
         $this->moveIllegalPage($row);
         return $this->progress(1);
     } else {
         $this->log("page {$row->page_id} ({$display}) doesn't match self.");
         $this->moveInconsistentPage($row, $title);
         return $this->progress(1);
     }
 }
 /**
  * Function executed by use of {{#infoboxbuilder:}} parser function.
  * It gets the code from InfoboxBuilder.lua and creates new module object
  * from it. The module is then invoked and the result is returned.
  * @param  Parser  $parser Parser object
  * @param  PPFrame $frame  PPFrame object
  * @param  array   $args   Array of arguments passed from $frame object
  * @return string          A string returned by InfoboxBuilder.lua
  */
 public static function parserFunctionHook(\Parser $parser, $frame, $args)
 {
     wfProfileIn(__METHOD__);
     try {
         /**
          * Add the registered SCSS with the default theme
          */
         $parser->getOutput()->addModuleStyles('ext.wikia.InfoboxBuilder');
         $engine = \Scribunto::getParserEngine($parser);
         unset($args[0]);
         $childFrame = $frame->newChild($args, $parser->getTitle(), 1);
         $moduleText = file_get_contents(__DIR__ . '/includes/lua/InfoboxBuilder.lua');
         $module = new \Scribunto_LuaModule($engine, $moduleText, 'InfoboxBuilder');
         $result = $module->invoke('builder', $childFrame);
         $result = \UtfNormal::cleanUp(strval($result));
         wfProfileOut(__METHOD__);
         return $result;
     } catch (\ScribuntoException $e) {
         $trace = $e->getScriptTraceHtml(array('msgOptions' => array('content')));
         $html = \Html::element('p', array(), $e->getMessage());
         if ($trace !== false) {
             $html .= \Html::element('p', array(), wfMessage('scribunto-common-backtrace')->inContentLanguage()->text()) . $trace;
         }
         $out = $parser->getOutput();
         if (!isset($out->scribunto_errors)) {
             $out->addOutputHook('ScribuntoError');
             $out->scribunto_errors = array();
             $parser->addTrackingCategory('scribunto-common-error-category');
         }
         $out->scribunto_errors[] = $html;
         $id = 'mw-scribunto-error-' . (count($out->scribunto_errors) - 1);
         $parserError = wfMessage('scribunto-parser-error')->inContentLanguage()->text() . $parser->insertStripItem('<!--' . htmlspecialchars($e->getMessage()) . '-->');
         wfProfileOut(__METHOD__);
         // #iferror-compatible error element
         return "<strong class=\"error\"><span class=\"scribunto-error\" id=\"{$id}\">" . $parserError . "</span></strong>";
     }
 }
Example #11
0
 /**
  * Hook function for {{#invoke:module|func}}
  *
  * @param $parser Parser
  * @param $frame PPFrame
  * @param $args array
  * @throws MWException
  * @throws ScribuntoException
  * @return string
  */
 public static function invokeHook(&$parser, $frame, $args)
 {
     if (!@constant(get_class($frame) . '::SUPPORTS_INDEX_OFFSET')) {
         throw new MWException('Scribunto needs MediaWiki 1.20 or later (Preprocessor::SUPPORTS_INDEX_OFFSET)');
     }
     wfProfileIn(__METHOD__);
     try {
         if (count($args) < 2) {
             throw new ScribuntoException('scribunto-common-nofunction');
         }
         $moduleName = trim($frame->expand($args[0]));
         $engine = Scribunto::getParserEngine($parser);
         $title = Title::makeTitleSafe(NS_MODULE, $moduleName);
         if (!$title) {
             throw new ScribuntoException('scribunto-common-nosuchmodule');
         }
         $module = $engine->fetchModuleFromParser($title);
         if (!$module) {
             throw new ScribuntoException('scribunto-common-nosuchmodule');
         }
         $functionName = trim($frame->expand($args[1]));
         unset($args[0]);
         unset($args[1]);
         $childFrame = $frame->newChild($args, $title, 1);
         $result = $module->invoke($functionName, $childFrame);
         $result = UtfNormal::cleanUp(strval($result));
         wfProfileOut(__METHOD__);
         return $result;
     } catch (ScribuntoException $e) {
         $trace = $e->getScriptTraceHtml(array('msgOptions' => array('content')));
         $html = Html::element('p', array(), $e->getMessage());
         if ($trace !== false) {
             $html .= Html::element('p', array(), wfMessage('scribunto-common-backtrace')->inContentLanguage()->text()) . $trace;
         }
         $out = $parser->getOutput();
         if (!isset($out->scribunto_errors)) {
             $out->addOutputHook('ScribuntoError');
             $out->scribunto_errors = array();
             $parser->addTrackingCategory('scribunto-common-error-category');
         }
         $out->scribunto_errors[] = $html;
         $id = 'mw-scribunto-error-' . (count($out->scribunto_errors) - 1);
         $parserError = wfMessage('scribunto-parser-error')->inContentLanguage()->text() . $parser->insertStripItem('<!--' . htmlspecialchars($e->getMessage()) . '-->');
         wfProfileOut(__METHOD__);
         // #iferror-compatible error element
         return "<strong class=\"error\"><span class=\"scribunto-error\" id=\"{$id}\">" . $parserError . "</span></strong>";
     }
 }
Example #12
0
 /**
  * Convert a UTF-8 string to normal form C. In Malayalam and Arabic, this
  * also cleans up certain backwards-compatible sequences, converting them
  * to the modern Unicode equivalent.
  *
  * This is language-specific for performance reasons only.
  */
 function normalize($s)
 {
     return UtfNormal::cleanUp($s);
 }
Example #13
0
    $diffs = new Diff($ota, $nta);
    $formatter = new TableDiffFormatter();
    $funky = $formatter->format($diffs);
    preg_match_all('/<span class="diffchange">(.*?)<\\/span>/', $funky, $matches);
    foreach ($matches[1] as $bit) {
        $hex = bin2hex($bit);
        echo "\t{$hex}\n";
    }
}
$size = 16;
$n = 0;
while (true) {
    $n++;
    echo "{$n}\n";
    $str = randomString($size, true);
    $clean = UtfNormal::cleanUp($str);
    $norm = donorm($str);
    echo strlen($clean) . ", " . strlen($norm);
    if ($clean == $norm) {
        echo " (match)\n";
    } else {
        echo " (FAIL)\n";
        echo "\traw: " . bin2hex($str) . "\n" . "\tphp: " . bin2hex($clean) . "\n" . "\ticu: " . bin2hex($norm) . "\n";
        echo "\n\tdiffs:\n";
        showDiffs($clean, $norm);
        die;
    }
    $str = '';
    $clean = '';
    $norm = '';
}
 /**
  * Preprocess some wikitext and return the document tree.
  * This is the ghost of Parser::replace_variables().
  *
  * @param string $text the text to parse
  * @param $flags Integer: bitwise combination of:
  *          Parser::PTD_FOR_INCLUSION    Handle "<noinclude>" and "<includeonly>" as if the text is being
  *                                     included. Default is to assume a direct page view.
  *
  * The generated DOM tree must depend only on the input text and the flags.
  * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899.
  *
  * Any flag added to the $flags parameter here, or any other parameter liable to cause a
  * change in the DOM tree for a given text, must be passed through the section identifier
  * in the section edit link and thus back to extractSections().
  *
  * The output of this function is currently only cached in process memory, but a persistent
  * cache may be implemented at a later date which takes further advantage of these strict
  * dependency requirements.
  *
  * @throws MWException
  * @return PPNode_DOM
  */
 function preprocessToObj($text, $flags = 0)
 {
     wfProfileIn(__METHOD__);
     global $wgMemc, $wgPreprocessorCacheThreshold;
     $xml = false;
     $cacheable = $wgPreprocessorCacheThreshold !== false && strlen($text) > $wgPreprocessorCacheThreshold;
     if ($cacheable) {
         wfProfileIn(__METHOD__ . '-cacheable');
         $cacheKey = wfMemcKey('preprocess-xml', md5($text), $flags);
         $cacheValue = $wgMemc->get($cacheKey);
         if ($cacheValue) {
             $version = substr($cacheValue, 0, 8);
             if (intval($version) == self::CACHE_VERSION) {
                 $xml = substr($cacheValue, 8);
                 // From the cache
                 wfDebugLog("Preprocessor", "Loaded preprocessor XML from memcached (key {$cacheKey})");
             }
         }
         if ($xml === false) {
             wfProfileIn(__METHOD__ . '-cache-miss');
             $xml = $this->preprocessToXml($text, $flags);
             $cacheValue = sprintf("%08d", self::CACHE_VERSION) . $xml;
             $wgMemc->set($cacheKey, $cacheValue, 86400);
             wfProfileOut(__METHOD__ . '-cache-miss');
             wfDebugLog("Preprocessor", "Saved preprocessor XML to memcached (key {$cacheKey})");
         }
     } else {
         $xml = $this->preprocessToXml($text, $flags);
     }
     // Fail if the number of elements exceeds acceptable limits
     // Do not attempt to generate the DOM
     $this->parser->mGeneratedPPNodeCount += substr_count($xml, '<');
     $max = $this->parser->mOptions->getMaxGeneratedPPNodeCount();
     if ($this->parser->mGeneratedPPNodeCount > $max) {
         if ($cacheable) {
             wfProfileOut(__METHOD__ . '-cacheable');
         }
         wfProfileOut(__METHOD__);
         throw new MWException(__METHOD__ . ': generated node count limit exceeded');
     }
     wfProfileIn(__METHOD__ . '-loadXML');
     $dom = new DOMDocument();
     wfSuppressWarnings();
     $result = $dom->loadXML($xml);
     wfRestoreWarnings();
     if (!$result) {
         // Try running the XML through UtfNormal to get rid of invalid characters
         $xml = UtfNormal::cleanUp($xml);
         // 1 << 19 == XML_PARSE_HUGE, needed so newer versions of libxml2 don't barf when the XML is >256 levels deep
         $result = $dom->loadXML($xml, 1 << 19);
     }
     if ($result) {
         $obj = new PPNode_DOM($dom->documentElement);
     }
     wfProfileOut(__METHOD__ . '-loadXML');
     if ($cacheable) {
         wfProfileOut(__METHOD__ . '-cacheable');
     }
     wfProfileOut(__METHOD__);
     if (!$result) {
         throw new MWException(__METHOD__ . ' generated invalid XML');
     }
     return $obj;
 }
Example #15
0
 /**
  * Really format a diff for the newsfeed
  *
  * @param $title Title object
  * @param $oldid Integer: old revision's id
  * @param $newid Integer: new revision's id
  * @param $timestamp Integer: new revision's timestamp
  * @param $comment String: new revision's comment
  * @param $actiontext String: text of the action; in case of log event
  * @return String
  */
 public static function formatDiffRow($title, $oldid, $newid, $timestamp, $comment, $actiontext = '')
 {
     global $wgFeedDiffCutoff, $wgLang;
     wfProfileIn(__METHOD__);
     # log enties
     $completeText = '<p>' . implode(' ', array_filter(array($actiontext, Linker::formatComment($comment)))) . "</p>\n";
     // NOTE: Check permissions for anonymous users, not current user.
     //       No "privileged" version should end up in the cache.
     //       Most feed readers will not log in anway.
     $anon = new User();
     $accErrors = $title->getUserPermissionsErrors('read', $anon, true);
     // Can't diff special pages, unreadable pages or pages with no new revision
     // to compare against: just return the text.
     if ($title->getNamespace() < 0 || $accErrors || !$newid) {
         wfProfileOut(__METHOD__);
         return $completeText;
     }
     if ($oldid) {
         wfProfileIn(__METHOD__ . "-dodiff");
         #$diffText = $de->getDiff( wfMessage( 'revisionasof',
         #	$wgLang->timeanddate( $timestamp ),
         #	$wgLang->date( $timestamp ),
         #	$wgLang->time( $timestamp ) )->text(),
         #	wfMessage( 'currentrev' )->text() );
         $diffText = '';
         // Don't bother generating the diff if we won't be able to show it
         if ($wgFeedDiffCutoff > 0) {
             $rev = Revision::newFromId($oldid);
             if (!$rev) {
                 $diffText = false;
             } else {
                 $context = clone RequestContext::getMain();
                 $context->setTitle($title);
                 $contentHandler = $rev->getContentHandler();
                 $de = $contentHandler->createDifferenceEngine($context, $oldid, $newid);
                 $diffText = $de->getDiff(wfMessage('previousrevision')->text(), wfMessage('revisionasof', $wgLang->timeanddate($timestamp), $wgLang->date($timestamp), $wgLang->time($timestamp))->text());
             }
         }
         if ($wgFeedDiffCutoff <= 0 || strlen($diffText) > $wgFeedDiffCutoff) {
             // Omit large diffs
             $diffText = self::getDiffLink($title, $newid, $oldid);
         } elseif ($diffText === false) {
             // Error in diff engine, probably a missing revision
             $diffText = "<p>Can't load revision {$newid}</p>";
         } else {
             // Diff output fine, clean up any illegal UTF-8
             $diffText = UtfNormal::cleanUp($diffText);
             $diffText = self::applyDiffStyle($diffText);
         }
         wfProfileOut(__METHOD__ . "-dodiff");
     } else {
         $rev = Revision::newFromId($newid);
         if ($wgFeedDiffCutoff <= 0 || is_null($rev)) {
             $newContent = ContentHandler::getForTitle($title)->makeEmptyContent();
         } else {
             $newContent = $rev->getContent();
         }
         if ($newContent instanceof TextContent) {
             // only textual content has a "source view".
             $text = $newContent->getNativeData();
             if ($wgFeedDiffCutoff <= 0 || strlen($text) > $wgFeedDiffCutoff) {
                 $html = null;
             } else {
                 $html = nl2br(htmlspecialchars($text));
             }
         } else {
             //XXX: we could get an HTML representation of the content via getParserOutput, but that may
             //     contain JS magic and generally may not be suitable for inclusion in a feed.
             //     Perhaps Content should have a getDescriptiveHtml method and/or a getSourceText method.
             //Compare also ApiFeedContributions::feedItemDesc
             $html = null;
         }
         if ($html === null) {
             // Omit large new page diffs, bug 29110
             // Also use diff link for non-textual content
             $diffText = self::getDiffLink($title, $newid);
         } else {
             $diffText = '<p><b>' . wfMessage('newpage')->text() . '</b></p>' . '<div>' . $html . '</div>';
         }
     }
     $completeText .= $diffText;
     wfProfileOut(__METHOD__);
     return $completeText;
 }
Example #16
0
 /**
  * Preprocess some wikitext and return the document tree.
  * This is the ghost of Parser::replace_variables().
  *
  * @param string $text The text to parse
  * @param integer flags Bitwise combination of:
  *          Parser::PTD_FOR_INCLUSION    Handle <noinclude>/<includeonly> as if the text is being
  *                                     included. Default is to assume a direct page view.
  *
  * The generated DOM tree must depend only on the input text and the flags.
  * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899.
  *
  * Any flag added to the $flags parameter here, or any other parameter liable to cause a
  * change in the DOM tree for a given text, must be passed through the section identifier
  * in the section edit link and thus back to extractSections().
  *
  * The output of this function is currently only cached in process memory, but a persistent
  * cache may be implemented at a later date which takes further advantage of these strict
  * dependency requirements.
  *
  * @private
  */
 function preprocessToObj($text, $flags = 0)
 {
     wfProfileIn(__METHOD__);
     wfProfileIn(__METHOD__ . '-makexml');
     $rules = array('{' => array('end' => '}', 'names' => array(2 => 'template', 3 => 'tplarg'), 'min' => 2, 'max' => 3), '[' => array('end' => ']', 'names' => array(2 => null), 'min' => 2, 'max' => 2));
     $forInclusion = $flags & Parser::PTD_FOR_INCLUSION;
     $xmlishElements = $this->parser->getStripList();
     $enableOnlyinclude = false;
     if ($forInclusion) {
         $ignoredTags = array('includeonly', '/includeonly');
         $ignoredElements = array('noinclude');
         $xmlishElements[] = 'noinclude';
         if (strpos($text, '<onlyinclude>') !== false && strpos($text, '</onlyinclude>') !== false) {
             $enableOnlyinclude = true;
         }
     } else {
         $ignoredTags = array('noinclude', '/noinclude', 'onlyinclude', '/onlyinclude');
         $ignoredElements = array('includeonly');
         $xmlishElements[] = 'includeonly';
     }
     $xmlishRegex = implode('|', array_merge($xmlishElements, $ignoredTags));
     // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
     $elementsRegex = "~({$xmlishRegex})(?:\\s|\\/>|>)|(!--)~iA";
     $stack = new PPDStack();
     $searchBase = "[{<\n";
     #}
     $revText = strrev($text);
     // For fast reverse searches
     $i = 0;
     # Input pointer, starts out pointing to a pseudo-newline before the start
     $accum =& $stack->getAccum();
     # Current accumulator
     $accum = '<root>';
     $findEquals = false;
     # True to find equals signs in arguments
     $findPipe = false;
     # True to take notice of pipe characters
     $headingIndex = 1;
     $inHeading = false;
     # True if $i is inside a possible heading
     $noMoreGT = false;
     # True if there are no more greater-than (>) signs right of $i
     $findOnlyinclude = $enableOnlyinclude;
     # True to ignore all input up to the next <onlyinclude>
     $fakeLineStart = true;
     # Do a line-start run without outputting an LF character
     while (true) {
         //$this->memCheck();
         if ($findOnlyinclude) {
             // Ignore all input up to the next <onlyinclude>
             $startPos = strpos($text, '<onlyinclude>', $i);
             if ($startPos === false) {
                 // Ignored section runs to the end
                 $accum .= '<ignore>' . htmlspecialchars(substr($text, $i)) . '</ignore>';
                 break;
             }
             $tagEndPos = $startPos + strlen('<onlyinclude>');
             // past-the-end
             $accum .= '<ignore>' . htmlspecialchars(substr($text, $i, $tagEndPos - $i)) . '</ignore>';
             $i = $tagEndPos;
             $findOnlyinclude = false;
         }
         if ($fakeLineStart) {
             $found = 'line-start';
             $curChar = '';
         } else {
             # Find next opening brace, closing brace or pipe
             $search = $searchBase;
             if ($stack->top === false) {
                 $currentClosing = '';
             } else {
                 $currentClosing = $stack->top->close;
                 $search .= $currentClosing;
             }
             if ($findPipe) {
                 $search .= '|';
             }
             if ($findEquals) {
                 // First equals will be for the template
                 $search .= '=';
             }
             $rule = null;
             # Output literal section, advance input counter
             $literalLength = strcspn($text, $search, $i);
             if ($literalLength > 0) {
                 $accum .= htmlspecialchars(substr($text, $i, $literalLength));
                 $i += $literalLength;
             }
             if ($i >= strlen($text)) {
                 if ($currentClosing == "\n") {
                     // Do a past-the-end run to finish off the heading
                     $curChar = '';
                     $found = 'line-end';
                 } else {
                     # All done
                     break;
                 }
             } else {
                 $curChar = $text[$i];
                 if ($curChar == '|') {
                     $found = 'pipe';
                 } elseif ($curChar == '=') {
                     $found = 'equals';
                 } elseif ($curChar == '<') {
                     $found = 'angle';
                 } elseif ($curChar == "\n") {
                     if ($inHeading) {
                         $found = 'line-end';
                     } else {
                         $found = 'line-start';
                     }
                 } elseif ($curChar == $currentClosing) {
                     $found = 'close';
                 } elseif (isset($rules[$curChar])) {
                     $found = 'open';
                     $rule = $rules[$curChar];
                 } else {
                     # Some versions of PHP have a strcspn which stops on null characters
                     # Ignore and continue
                     ++$i;
                     continue;
                 }
             }
         }
         if ($found == 'angle') {
             $matches = false;
             // Handle </onlyinclude>
             if ($enableOnlyinclude && substr($text, $i, strlen('</onlyinclude>')) == '</onlyinclude>') {
                 $findOnlyinclude = true;
                 continue;
             }
             // Determine element name
             if (!preg_match($elementsRegex, $text, $matches, 0, $i + 1)) {
                 // Element name missing or not listed
                 $accum .= '&lt;';
                 ++$i;
                 continue;
             }
             // Handle comments
             if (isset($matches[2]) && $matches[2] == '!--') {
                 // To avoid leaving blank lines, when a comment is both preceded
                 // and followed by a newline (ignoring spaces), trim leading and
                 // trailing spaces and one of the newlines.
                 // Find the end
                 $endPos = strpos($text, '-->', $i + 4);
                 if ($endPos === false) {
                     // Unclosed comment in input, runs to end
                     $inner = substr($text, $i);
                     $accum .= '<comment>' . htmlspecialchars($inner) . '</comment>';
                     $i = strlen($text);
                 } else {
                     // Search backwards for leading whitespace
                     $wsStart = $i ? $i - strspn($revText, ' ', strlen($text) - $i) : 0;
                     // Search forwards for trailing whitespace
                     // $wsEnd will be the position of the last space
                     $wsEnd = $endPos + 2 + strspn($text, ' ', $endPos + 3);
                     // Eat the line if possible
                     // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
                     // the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but
                     // it's a possible beneficial b/c break.
                     if ($wsStart > 0 && substr($text, $wsStart - 1, 1) == "\n" && substr($text, $wsEnd + 1, 1) == "\n") {
                         $startPos = $wsStart;
                         $endPos = $wsEnd + 1;
                         // Remove leading whitespace from the end of the accumulator
                         // Sanity check first though
                         $wsLength = $i - $wsStart;
                         if ($wsLength > 0 && substr($accum, -$wsLength) === str_repeat(' ', $wsLength)) {
                             $accum = substr($accum, 0, -$wsLength);
                         }
                         // Do a line-start run next time to look for headings after the comment
                         $fakeLineStart = true;
                     } else {
                         // No line to eat, just take the comment itself
                         $startPos = $i;
                         $endPos += 2;
                     }
                     if ($stack->top) {
                         $part = $stack->top->getCurrentPart();
                         if (isset($part->commentEnd) && $part->commentEnd == $wsStart - 1) {
                             // Comments abutting, no change in visual end
                             $part->commentEnd = $wsEnd;
                         } else {
                             $part->visualEnd = $wsStart;
                             $part->commentEnd = $endPos;
                         }
                     }
                     $i = $endPos + 1;
                     $inner = substr($text, $startPos, $endPos - $startPos + 1);
                     $accum .= '<comment>' . htmlspecialchars($inner) . '</comment>';
                 }
                 continue;
             }
             $name = $matches[1];
             $lowerName = strtolower($name);
             $attrStart = $i + strlen($name) + 1;
             // Find end of tag
             $tagEndPos = $noMoreGT ? false : strpos($text, '>', $attrStart);
             if ($tagEndPos === false) {
                 // Infinite backtrack
                 // Disable tag search to prevent worst-case O(N^2) performance
                 $noMoreGT = true;
                 $accum .= '&lt;';
                 ++$i;
                 continue;
             }
             // Handle ignored tags
             if (in_array($lowerName, $ignoredTags)) {
                 $accum .= '<ignore>' . htmlspecialchars(substr($text, $i, $tagEndPos - $i + 1)) . '</ignore>';
                 $i = $tagEndPos + 1;
                 continue;
             }
             $tagStartPos = $i;
             if ($text[$tagEndPos - 1] == '/') {
                 $attrEnd = $tagEndPos - 1;
                 $inner = null;
                 $i = $tagEndPos + 1;
                 $close = '';
             } else {
                 $attrEnd = $tagEndPos;
                 // Find closing tag
                 if (preg_match("/<\\/{$name}\\s*>/i", $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1)) {
                     $inner = substr($text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1);
                     $i = $matches[0][1] + strlen($matches[0][0]);
                     $close = '<close>' . htmlspecialchars($matches[0][0]) . '</close>';
                 } else {
                     // No end tag -- let it run out to the end of the text.
                     $inner = substr($text, $tagEndPos + 1);
                     $i = strlen($text);
                     $close = '';
                 }
             }
             // <includeonly> and <noinclude> just become <ignore> tags
             if (in_array($lowerName, $ignoredElements)) {
                 $accum .= '<ignore>' . htmlspecialchars(substr($text, $tagStartPos, $i - $tagStartPos)) . '</ignore>';
                 continue;
             }
             $accum .= '<ext>';
             if ($attrEnd <= $attrStart) {
                 $attr = '';
             } else {
                 $attr = substr($text, $attrStart, $attrEnd - $attrStart);
             }
             $accum .= '<name>' . htmlspecialchars($name) . '</name>' . '<attr>' . htmlspecialchars($attr) . '</attr>';
             if ($inner !== null) {
                 $accum .= '<inner>' . htmlspecialchars($inner) . '</inner>';
             }
             $accum .= $close . '</ext>';
         } elseif ($found == 'line-start') {
             // Is this the start of a heading?
             // Line break belongs before the heading element in any case
             if ($fakeLineStart) {
                 $fakeLineStart = false;
             } else {
                 $accum .= $curChar;
                 $i++;
             }
             $count = strspn($text, '=', $i, 6);
             if ($count == 1 && $findEquals) {
                 // DWIM: This looks kind of like a name/value separator
                 // Let's let the equals handler have it and break the potential heading
                 // This is heuristic, but AFAICT the methods for completely correct disambiguation are very complex.
             } elseif ($count > 0) {
                 $piece = array('open' => "\n", 'close' => "\n", 'parts' => array(new PPDPart(str_repeat('=', $count))), 'startPos' => $i, 'count' => $count);
                 $stack->push($piece);
                 $accum =& $stack->getAccum();
                 extract($stack->getFlags());
                 $i += $count;
             }
         } elseif ($found == 'line-end') {
             $piece = $stack->top;
             // A heading must be open, otherwise \n wouldn't have been in the search list
             assert($piece->open == "\n");
             $part = $piece->getCurrentPart();
             // Search back through the input to see if it has a proper close
             // Do this using the reversed string since the other solutions (end anchor, etc.) are inefficient
             $wsLength = strspn($revText, " \t", strlen($text) - $i);
             $searchStart = $i - $wsLength;
             if (isset($part->commentEnd) && $searchStart - 1 == $part->commentEnd) {
                 // Comment found at line end
                 // Search for equals signs before the comment
                 $searchStart = $part->visualEnd;
                 $searchStart -= strspn($revText, " \t", strlen($text) - $searchStart);
             }
             $count = $piece->count;
             $equalsLength = strspn($revText, '=', strlen($text) - $searchStart);
             if ($equalsLength > 0) {
                 if ($i - $equalsLength == $piece->startPos) {
                     // This is just a single string of equals signs on its own line
                     // Replicate the doHeadings behaviour /={count}(.+)={count}/
                     // First find out how many equals signs there really are (don't stop at 6)
                     $count = $equalsLength;
                     if ($count < 3) {
                         $count = 0;
                     } else {
                         $count = min(6, intval(($count - 1) / 2));
                     }
                 } else {
                     $count = min($equalsLength, $count);
                 }
                 if ($count > 0) {
                     // Normal match, output <h>
                     $element = "<h level=\"{$count}\" i=\"{$headingIndex}\">{$accum}</h>";
                     $headingIndex++;
                 } else {
                     // Single equals sign on its own line, count=0
                     $element = $accum;
                 }
             } else {
                 // No match, no <h>, just pass down the inner text
                 $element = $accum;
             }
             // Unwind the stack
             $stack->pop();
             $accum =& $stack->getAccum();
             extract($stack->getFlags());
             // Append the result to the enclosing accumulator
             $accum .= $element;
             // Note that we do NOT increment the input pointer.
             // This is because the closing linebreak could be the opening linebreak of
             // another heading. Infinite loops are avoided because the next iteration MUST
             // hit the heading open case above, which unconditionally increments the
             // input pointer.
         } elseif ($found == 'open') {
             # count opening brace characters
             $count = strspn($text, $curChar, $i);
             # we need to add to stack only if opening brace count is enough for one of the rules
             if ($count >= $rule['min']) {
                 # Add it to the stack
                 $piece = array('open' => $curChar, 'close' => $rule['end'], 'count' => $count, 'lineStart' => $i > 0 && $text[$i - 1] == "\n");
                 $stack->push($piece);
                 $accum =& $stack->getAccum();
                 extract($stack->getFlags());
             } else {
                 # Add literal brace(s)
                 $accum .= htmlspecialchars(str_repeat($curChar, $count));
             }
             $i += $count;
         } elseif ($found == 'close') {
             $piece = $stack->top;
             # lets check if there are enough characters for closing brace
             $maxCount = $piece->count;
             $count = strspn($text, $curChar, $i, $maxCount);
             # check for maximum matching characters (if there are 5 closing
             # characters, we will probably need only 3 - depending on the rules)
             $matchingCount = 0;
             $rule = $rules[$piece->open];
             if ($count > $rule['max']) {
                 # The specified maximum exists in the callback array, unless the caller
                 # has made an error
                 $matchingCount = $rule['max'];
             } else {
                 # Count is less than the maximum
                 # Skip any gaps in the callback array to find the true largest match
                 # Need to use array_key_exists not isset because the callback can be null
                 $matchingCount = $count;
                 while ($matchingCount > 0 && !array_key_exists($matchingCount, $rule['names'])) {
                     --$matchingCount;
                 }
             }
             if ($matchingCount <= 0) {
                 # No matching element found in callback array
                 # Output a literal closing brace and continue
                 $accum .= htmlspecialchars(str_repeat($curChar, $count));
                 $i += $count;
                 continue;
             }
             $name = $rule['names'][$matchingCount];
             if ($name === null) {
                 // No element, just literal text
                 $element = $piece->breakSyntax($matchingCount) . str_repeat($rule['end'], $matchingCount);
             } else {
                 # Create XML element
                 # Note: $parts is already XML, does not need to be encoded further
                 $parts = $piece->parts;
                 $title = $parts[0]->out;
                 unset($parts[0]);
                 # The invocation is at the start of the line if lineStart is set in
                 # the stack, and all opening brackets are used up.
                 if ($maxCount == $matchingCount && !empty($piece->lineStart)) {
                     $attr = ' lineStart="1"';
                 } else {
                     $attr = '';
                 }
                 $element = "<{$name}{$attr}>";
                 $element .= "<title>{$title}</title>";
                 $argIndex = 1;
                 foreach ($parts as $partIndex => $part) {
                     if (isset($part->eqpos)) {
                         $argName = substr($part->out, 0, $part->eqpos);
                         $argValue = substr($part->out, $part->eqpos + 1);
                         $element .= "<part><name>{$argName}</name>=<value>{$argValue}</value></part>";
                     } else {
                         $element .= "<part><name index=\"{$argIndex}\" /><value>{$part->out}</value></part>";
                         $argIndex++;
                     }
                 }
                 $element .= "</{$name}>";
             }
             # Advance input pointer
             $i += $matchingCount;
             # Unwind the stack
             $stack->pop();
             $accum =& $stack->getAccum();
             # Re-add the old stack element if it still has unmatched opening characters remaining
             if ($matchingCount < $piece->count) {
                 $piece->parts = array(new PPDPart());
                 $piece->count -= $matchingCount;
                 # do we still qualify for any callback with remaining count?
                 $names = $rules[$piece->open]['names'];
                 $skippedBraces = 0;
                 $enclosingAccum =& $accum;
                 while ($piece->count) {
                     if (array_key_exists($piece->count, $names)) {
                         $stack->push($piece);
                         $accum =& $stack->getAccum();
                         break;
                     }
                     --$piece->count;
                     $skippedBraces++;
                 }
                 $enclosingAccum .= str_repeat($piece->open, $skippedBraces);
             }
             extract($stack->getFlags());
             # Add XML element to the enclosing accumulator
             $accum .= $element;
         } elseif ($found == 'pipe') {
             $findEquals = true;
             // shortcut for getFlags()
             $stack->addPart();
             $accum =& $stack->getAccum();
             ++$i;
         } elseif ($found == 'equals') {
             $findEquals = false;
             // shortcut for getFlags()
             $stack->getCurrentPart()->eqpos = strlen($accum);
             $accum .= '=';
             ++$i;
         }
     }
     # Output any remaining unclosed brackets
     foreach ($stack->stack as $piece) {
         $stack->rootAccum .= $piece->breakSyntax();
     }
     $stack->rootAccum .= '</root>';
     $xml = $stack->rootAccum;
     wfProfileOut(__METHOD__ . '-makexml');
     wfProfileIn(__METHOD__ . '-loadXML');
     $dom = new DOMDocument();
     wfSuppressWarnings();
     $result = $dom->loadXML($xml);
     wfRestoreWarnings();
     if (!$result) {
         // Try running the XML through UtfNormal to get rid of invalid characters
         $xml = UtfNormal::cleanUp($xml);
         $result = $dom->loadXML($xml);
         if (!$result) {
             throw new MWException(__METHOD__ . ' generated invalid XML');
         }
     }
     $obj = new PPNode_DOM($dom->documentElement);
     wfProfileOut(__METHOD__ . '-loadXML');
     wfProfileOut(__METHOD__);
     return $obj;
 }
Example #17
0
 function pageTextCallback($matches)
 {
     # Get rid of invalid UTF-8, strip control characters
     $val = htmlspecialchars(UtfNormal::cleanUp(stripcslashes($matches[1])));
     $val = str_replace(array("\n", '�'), array('&#10;', ''), $val);
     return '<PAGE value="' . $val . '" />';
 }
Example #18
0
 /**
  * Return the original filename of the uploaded file, as reported by
  * the submitting user agent. HTML-style character entities are
  * interpreted and normalized to Unicode normalization form C, in part
  * to deal with weird input from Safari with non-ASCII filenames.
  *
  * Other than this the name is not verified for being a safe filename.
  *
  * @param $key String: 
  * @return string or NULL if no such file.
  */
 function getFileName($key)
 {
     if (!isset($_FILES[$key])) {
         return NULL;
     }
     $name = $_FILES[$key]['name'];
     # Safari sends filenames in HTML-encoded Unicode form D...
     # Horrid and evil! Let's try to make some kind of sense of it.
     $name = Sanitizer::decodeCharReferences($name);
     $name = UtfNormal::cleanUp($name);
     wfDebug("WebRequest::getFileName() '" . $_FILES[$key]['name'] . "' normalized to '{$name}'\n");
     return $name;
 }
Example #19
0
 /**
  * Convert a UTF-8 string to normal form C. In Malayalam and Arabic, this
  * also cleans up certain backwards-compatible sequences, converting them
  * to the modern Unicode equivalent.
  *
  * This is language-specific for performance reasons only.
  *
  * @param $s string
  *
  * @return string
  */
 function normalize($s)
 {
     global $wgAllUnicodeFixes;
     $s = UtfNormal::cleanUp($s);
     if ($wgAllUnicodeFixes) {
         $s = $this->transformUsingPairFile('normalize-ar.ser', $s);
         $s = $this->transformUsingPairFile('normalize-ml.ser', $s);
     }
     return $s;
 }
 private function getTextSpawnedOnce($id)
 {
     $ok = fwrite($this->spawnWrite, "{$id}\n");
     //$this->progress( ">> $id" );
     if (!$ok) {
         return false;
     }
     $ok = fflush($this->spawnWrite);
     //$this->progress( ">> [flush]" );
     if (!$ok) {
         return false;
     }
     $len = fgets($this->spawnRead);
     //$this->progress( "<< " . trim( $len ) );
     if ($len === false) {
         return false;
     }
     $nbytes = intval($len);
     $text = "";
     // Subprocess may not send everything at once, we have to loop.
     while ($nbytes > strlen($text)) {
         $buffer = fread($this->spawnRead, $nbytes - strlen($text));
         if ($text === false) {
             break;
         }
         $text .= $buffer;
     }
     $gotbytes = strlen($text);
     if ($gotbytes != $nbytes) {
         $this->progress("Expected {$nbytes} bytes from database subprocess, got {$gotbytes} ");
         return false;
     }
     // Do normalization in the dump thread...
     $stripped = str_replace("\r", "", $text);
     $normalized = UtfNormal::cleanUp($stripped);
     return $normalized;
 }
 /**
  * Reads messages from the file in a given language and returns an array
  * of AUTHORS, MESSAGES and possibly other properties.
  *
  * @param string $code Language code.
  * @return array|bool False if the file does not exist
  * @throws MWException if the file is not readable or has bad encoding
  */
 public function read($code)
 {
     if (!$this->exists($code)) {
         return false;
     }
     $filename = $this->group->getSourceFilePath($code);
     $input = file_get_contents($filename);
     if ($input === false) {
         throw new MWException("Unable to read file {$filename}.");
     }
     if (!StringUtils::isUtf8($input)) {
         throw new MWException("Contents of {$filename} are not valid utf-8.");
     }
     $input = UtfNormal::cleanUp($input);
     try {
         return $this->readFromVariable($input);
     } catch (Exception $e) {
         throw new MWException("Parsing {$filename} failed: " . $e->getMessage());
     }
 }
Example #22
0
 /**
  * May throw a database error if, say, the server dies during query.
  */
 private function doGetText($id)
 {
     $id = intval($id);
     $row = $this->db->selectRow('text', array('old_text', 'old_flags'), array('old_id' => $id), 'TextPassDumper::getText');
     $text = Revision::getRevisionText($row);
     $stripped = str_replace("\r", "", $text);
     $normalized = UtfNormal::cleanUp($stripped);
     return $normalized;
 }
/**
 * Format a diff for the newsfeed
 */
function rcFormatDiff($row)
{
    $fname = 'rcFormatDiff';
    wfProfileIn($fname);
    require_once 'DifferenceEngine.php';
    $comment = "<p>" . htmlspecialchars($row->rc_comment) . "</p>\n";
    if ($row->rc_namespace >= 0) {
        global $wgContLang;
        #$diff =& new DifferenceEngine( $row->rc_this_oldid, $row->rc_last_oldid, $row->rc_id );
        #$diff->showDiffPage();
        $titleObj = Title::makeTitle($row->rc_namespace, $row->rc_title);
        $dbr =& wfGetDB(DB_SLAVE);
        $newrev =& Revision::newFromTitle($titleObj, $row->rc_this_oldid);
        if ($newrev) {
            $newtext = $newrev->getText();
        } else {
            $diffText = "<p>Can't load revision {$row->rc_this_oldid}</p>";
            wfProfileOut($fname);
            return $comment . $diffText;
        }
        if ($row->rc_last_oldid) {
            wfProfileIn("{$fname}-dodiff");
            $oldrev =& Revision::newFromId($row->rc_last_oldid);
            if (!$oldrev) {
                $diffText = "<p>Can't load old revision {$row->rc_last_oldid}</p>";
                wfProfileOut($fname);
                return $comment . $diffText;
            }
            $oldtext = $oldrev->getText();
            # Old entries may contain illegal characters
            # which will damage output
            $oldtext = UtfNormal::cleanUp($oldtext);
            global $wgFeedDiffCutoff;
            if (strlen($newtext) > $wgFeedDiffCutoff || strlen($oldtext) > $wgFeedDiffCutoff) {
                $diffLink = $titleObj->escapeFullUrl('diff=' . $row->rc_this_oldid . '&oldid=' . $row->rc_last_oldid);
                $diffText = '<a href="' . $diffLink . '">' . htmlspecialchars(wfMsgForContent('difference')) . '</a>';
            } else {
                $diffText = DifferenceEngine::getDiff($oldtext, $newtext, wfMsg('revisionasof', $wgContLang->timeanddate($row->rc_timestamp)), wfMsg('currentrev'));
            }
            wfProfileOut("{$fname}-dodiff");
        } else {
            $diffText = '<p><b>' . wfMsg('newpage') . '</b></p>' . '<div>' . nl2br(htmlspecialchars($newtext)) . '</div>';
        }
        wfProfileOut($fname);
        return $comment . $diffText;
    }
    wfProfileOut($fname);
    return $comment;
}
Example #24
0
 /**
  * Evaluate a mathematical expression
  *
  * The algorithm here is based on the infix to RPN algorithm given in
  * http://montcs.bloomu.edu/~bobmon/Information/RPN/infix2rpn.shtml
  * It's essentially the same as Dijkstra's shunting yard algorithm.
  */
 function doExpression($expr)
 {
     $operands = array();
     $operators = array();
     # Unescape inequality operators
     $expr = strtr($expr, array('&lt;' => '<', '&gt;' => '>'));
     $p = 0;
     $end = strlen($expr);
     $expecting = 'expression';
     while ($p < $end) {
         if (count($operands) > $this->maxStackSize || count($operators) > $this->maxStackSize) {
             throw new ExprError('stack_exhausted');
         }
         $char = $expr[$p];
         $char2 = substr($expr, $p, 2);
         // Mega if-elseif-else construct
         // Only binary operators fall through for processing at the bottom, the rest
         // finish their processing and continue
         // First the unlimited length classes
         if (false !== strpos(EXPR_WHITE_CLASS, $char)) {
             // Whitespace
             $p += strspn($expr, EXPR_WHITE_CLASS, $p);
             continue;
         } elseif (false !== strpos(EXPR_NUMBER_CLASS, $char)) {
             // Number
             if ($expecting != 'expression') {
                 throw new ExprError('unexpected_number');
             }
             // Find the rest of it
             $length = strspn($expr, EXPR_NUMBER_CLASS, $p);
             // Convert it to float, silently removing double decimal points
             $operands[] = floatval(substr($expr, $p, $length));
             $p += $length;
             $expecting = 'operator';
             continue;
         } elseif (ctype_alpha($char)) {
             // Word
             // Find the rest of it
             $remaining = substr($expr, $p);
             if (!preg_match('/^[A-Za-z]*/', $remaining, $matches)) {
                 // This should be unreachable
                 throw new ExprError('preg_match_failure');
             }
             $word = strtolower($matches[0]);
             $p += strlen($word);
             // Interpret the word
             if (!isset($this->words[$word])) {
                 throw new ExprError('unrecognised_word', $word);
             }
             $op = $this->words[$word];
             switch ($op) {
                 // constant
                 case EXPR_EXPONENT:
                     if ($expecting != 'expression') {
                         continue;
                     }
                     $operands[] = exp(1);
                     $expecting = 'operator';
                     continue 2;
                 case EXPR_PI:
                     if ($expecting != 'expression') {
                         throw new ExprError('unexpected_number');
                     }
                     $operands[] = pi();
                     $expecting = 'operator';
                     continue 2;
                     // Unary operator
                 // Unary operator
                 case EXPR_NOT:
                 case EXPR_SINE:
                 case EXPR_COSINE:
                 case EXPR_TANGENS:
                 case EXPR_ARCSINE:
                 case EXPR_ARCCOS:
                 case EXPR_ARCTAN:
                 case EXPR_EXP:
                 case EXPR_LN:
                 case EXPR_ABS:
                 case EXPR_FLOOR:
                 case EXPR_TRUNC:
                 case EXPR_CEIL:
                     if ($expecting != 'expression') {
                         throw new ExprError('unexpected_operator', $word);
                     }
                     $operators[] = $op;
                     continue 2;
             }
             // Binary operator, fall through
             $name = $word;
         } elseif ($char2 == '<=') {
             $name = $char2;
             $op = EXPR_LESSEQ;
             $p += 2;
         } elseif ($char2 == '>=') {
             $name = $char2;
             $op = EXPR_GREATEREQ;
             $p += 2;
         } elseif ($char2 == '<>' || $char2 == '!=') {
             $name = $char2;
             $op = EXPR_NOTEQ;
             $p += 2;
         } elseif ($char == '+') {
             ++$p;
             if ($expecting == 'expression') {
                 // Unary plus
                 $operators[] = EXPR_POSITIVE;
                 continue;
             } else {
                 // Binary plus
                 $op = EXPR_PLUS;
             }
         } elseif ($char == '-') {
             ++$p;
             if ($expecting == 'expression') {
                 // Unary minus
                 $operators[] = EXPR_NEGATIVE;
                 continue;
             } else {
                 // Binary minus
                 $op = EXPR_MINUS;
             }
         } elseif ($char == '*') {
             $name = $char;
             $op = EXPR_TIMES;
             ++$p;
         } elseif ($char == '/') {
             $name = $char;
             $op = EXPR_DIVIDE;
             ++$p;
         } elseif ($char == '^') {
             $name = $char;
             $op = EXPR_POW;
             ++$p;
         } elseif ($char == '(') {
             if ($expecting == 'operator') {
                 throw new ExprError('unexpected_operator', '(');
             }
             $operators[] = EXPR_OPEN;
             ++$p;
             continue;
         } elseif ($char == ')') {
             $lastOp = end($operators);
             while ($lastOp && $lastOp != EXPR_OPEN) {
                 $this->doOperation($lastOp, $operands);
                 array_pop($operators);
                 $lastOp = end($operators);
             }
             if ($lastOp) {
                 array_pop($operators);
             } else {
                 throw new ExprError('unexpected_closing_bracket');
             }
             $expecting = 'operator';
             ++$p;
             continue;
         } elseif ($char == '=') {
             $name = $char;
             $op = EXPR_EQUALITY;
             ++$p;
         } elseif ($char == '<') {
             $name = $char;
             $op = EXPR_LESS;
             ++$p;
         } elseif ($char == '>') {
             $name = $char;
             $op = EXPR_GREATER;
             ++$p;
         } else {
             throw new ExprError('unrecognised_punctuation', UtfNormal::cleanUp($char));
         }
         // Binary operator processing
         if ($expecting == 'expression') {
             throw new ExprError('unexpected_operator', $name);
         }
         // Shunting yard magic
         $lastOp = end($operators);
         while ($lastOp && $this->precedence[$op] <= $this->precedence[$lastOp]) {
             $this->doOperation($lastOp, $operands);
             array_pop($operators);
             $lastOp = end($operators);
         }
         $operators[] = $op;
         $expecting = 'expression';
     }
     // Finish off the operator array
     while ($op = array_pop($operators)) {
         if ($op == EXPR_OPEN) {
             throw new ExprError('unclosed_bracket');
         }
         $this->doOperation($op, $operands);
     }
     return implode("<br />\n", $operands);
 }
Example #25
0
 /**
  * Callback function for cleanUpUTF8()
  */
 private static function cleanUp_helper(&$s)
 {
     if (!is_string($s)) {
         return;
     }
     $s = UtfNormal::cleanUp($s);
 }
Example #26
0
 /**
  * This is a method to pass messages from wfDebug to the pretty debugger.
  * Do NOT use this method, use MWDebug::log or wfDebug()
  *
  * @since 1.19
  * @param $str string
  */
 public static function debugMsg($str)
 {
     global $wgDebugComments, $wgShowDebug;
     if (self::$enabled || $wgDebugComments || $wgShowDebug) {
         self::$debug[] = rtrim(UtfNormal::cleanUp($str));
     }
 }
Example #27
0
 /**
  * Preprocess some wikitext and return the document tree.
  * This is the ghost of Parser::replace_variables().
  *
  * @param string $text The text to parse
  * @param integer flags Bitwise combination of:
  *          Parser::PTD_FOR_INCLUSION    Handle <noinclude>/<includeonly> as if the text is being
  *                                     included. Default is to assume a direct page view.
  *
  * The generated DOM tree must depend only on the input text and the flags.
  * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899.
  *
  * Any flag added to the $flags parameter here, or any other parameter liable to cause a
  * change in the DOM tree for a given text, must be passed through the section identifier
  * in the section edit link and thus back to extractSections().
  *
  * The output of this function is currently only cached in process memory, but a persistent
  * cache may be implemented at a later date which takes further advantage of these strict
  * dependency requirements.
  *
  * @private
  */
 function preprocessToObj($text, $flags = 0)
 {
     wfProfileIn(__METHOD__);
     global $wgMemc, $wgPreprocessorCacheThreshold;
     $xml = false;
     $cacheable = strlen($text) > $wgPreprocessorCacheThreshold;
     if ($cacheable) {
         wfProfileIn(__METHOD__ . '-cacheable');
         $cacheKey = wfMemcKey('preprocess-xml', md5($text), $flags);
         $cacheValue = $wgMemc->get($cacheKey);
         if ($cacheValue) {
             $version = substr($cacheValue, 0, 8);
             if (intval($version) == self::CACHE_VERSION) {
                 $xml = substr($cacheValue, 8);
                 // From the cache
                 wfDebugLog("Preprocessor", "Loaded preprocessor XML from memcached (key {$cacheKey})");
             }
         }
     }
     if ($xml === false) {
         if ($cacheable) {
             wfProfileIn(__METHOD__ . '-cache-miss');
             $xml = $this->preprocessToXml($text, $flags);
             $cacheValue = sprintf("%08d", self::CACHE_VERSION) . $xml;
             $wgMemc->set($cacheKey, $cacheValue, 86400);
             wfProfileOut(__METHOD__ . '-cache-miss');
             wfDebugLog("Preprocessor", "Saved preprocessor XML to memcached (key {$cacheKey})");
         } else {
             $xml = $this->preprocessToXml($text, $flags);
         }
     }
     wfProfileIn(__METHOD__ . '-loadXML');
     $dom = new DOMDocument();
     wfSuppressWarnings();
     $result = $dom->loadXML($xml);
     wfRestoreWarnings();
     if (!$result) {
         // Try running the XML through UtfNormal to get rid of invalid characters
         $xml = UtfNormal::cleanUp($xml);
         $result = $dom->loadXML($xml);
         if (!$result) {
             throw new MWException(__METHOD__ . ' generated invalid XML');
         }
     }
     $obj = new PPNode_DOM($dom->documentElement);
     wfProfileOut(__METHOD__ . '-loadXML');
     if ($cacheable) {
         wfProfileOut(__METHOD__ . '-cacheable');
     }
     wfProfileOut(__METHOD__);
     return $obj;
 }
Example #28
0
function xmlsafe($string)
{
    $fname = 'xmlsafe';
    wfProfileIn($fname);
    /**
     * The page may contain old data which has not been properly normalized.
     * Invalid UTF-8 sequences or forbidden control characters will make our
     * XML output invalid, so be sure to strip them out.
     */
    $string = UtfNormal::cleanUp($string);
    $string = htmlspecialchars($string);
    wfProfileOut($fname);
    return $string;
}
Example #29
0
 /**
  * Recursively normalizes UTF-8 strings in the given array.
  *
  * @param $data string|array
  * @return array|string cleaned-up version of the given
  * @private
  */
 function normalizeUnicode($data)
 {
     if (is_array($data)) {
         foreach ($data as $key => $val) {
             $data[$key] = $this->normalizeUnicode($val);
         }
     } else {
         global $wgContLang;
         $data = isset($wgContLang) ? $wgContLang->normalize($data) : UtfNormal::cleanUp($data);
     }
     return $data;
 }
Example #30
0
 function pageTextCallback($matches)
 {
     # Get rid of invalid UTF-8, strip control characters
     return '<PAGE value="' . htmlspecialchars(UtfNormal::cleanUp($matches[1])) . '" />';
 }