/** * Returns the normalized form of the given page title, using the * normalization rules of the given site. If the given title is a redirect, * the redirect weill be resolved and the redirect target is returned. * * @note This actually makes an API request to the remote site, so beware * that this function is slow and depends on an external service. * * @see Site::normalizePageName * * @since 1.27 * * @param string $pageName * @param string $apiUrl * * @return string * @throws \MWException */ public function normalizePageName($pageName, $apiUrl) { // Check if we have strings as arguments. if (!is_string($pageName)) { throw new \MWException('$pageName must be a string'); } // Go on call the external site // Make sure the string is normalized into NFC (due to T42017) // but do nothing to the whitespaces, that should work appropriately. // @see https://phabricator.wikimedia.org/T42017 $pageName = Validator::cleanUp($pageName); // Build the args for the specific call $args = ['action' => 'query', 'prop' => 'info', 'redirects' => true, 'converttitles' => true, 'format' => 'json', 'titles' => $pageName]; $url = wfAppendQuery($apiUrl, $args); // Go on call the external site // @todo we need a good way to specify a timeout here. $ret = $this->http->get($url, [], __METHOD__); if ($ret === false) { wfDebugLog("MediaWikiSite", "call to external site failed: {$url}"); return false; } $data = FormatJson::decode($ret, true); if (!is_array($data)) { wfDebugLog("MediaWikiSite", "call to <{$url}> returned bad json: " . $ret); return false; } $page = static::extractPageRecord($data, $pageName); if (isset($page['missing'])) { wfDebugLog("MediaWikiSite", "call to <{$url}> returned a marker for a missing page title! " . $ret); return false; } if (isset($page['invalid'])) { wfDebugLog("MediaWikiSite", "call to <{$url}> returned a marker for an invalid page title! " . $ret); return false; } if (!isset($page['title'])) { wfDebugLog("MediaWikiSite", "call to <{$url}> did not return a page title! " . $ret); return false; } return $page['title']; }
/** * The ultimate convenience function! Clean up invalid UTF-8 sequences, * and convert to normal form C, canonical composition. * * Fast return for pure ASCII strings; some lesser optimizations for * strings containing only known-good characters. Not as fast as toNFC(). * * @param string $string a UTF-8 string * @return string a clean, shiny, normalized UTF-8 string */ static function cleanUp($string) { return Validator::cleanUp($string); }
/** * Normalize string into NFC by using the cleanup method from UtfNormal. * * @param string $inputString The actual string to process. * * @return string */ public function cleanupToNFC($inputString) { $cleaned = $inputString; $cleaned = $this->trimBadChars($cleaned); $cleaned = Validator::cleanUp($cleaned); return $cleaned; }