/**
  * Returns the normalized form of the given page title, using the
  * normalization rules of the given site. If the given title is a redirect,
  * the redirect weill be resolved and the redirect target is returned.
  *
  * @note This actually makes an API request to the remote site, so beware
  *   that this function is slow and depends on an external service.
  *
  * @see Site::normalizePageName
  *
  * @since 1.27
  *
  * @param string $pageName
  * @param string $apiUrl
  *
  * @return string
  * @throws \MWException
  */
 public function normalizePageName($pageName, $apiUrl)
 {
     // Check if we have strings as arguments.
     if (!is_string($pageName)) {
         throw new \MWException('$pageName must be a string');
     }
     // Go on call the external site
     // Make sure the string is normalized into NFC (due to T42017)
     // but do nothing to the whitespaces, that should work appropriately.
     // @see https://phabricator.wikimedia.org/T42017
     $pageName = Validator::cleanUp($pageName);
     // Build the args for the specific call
     $args = ['action' => 'query', 'prop' => 'info', 'redirects' => true, 'converttitles' => true, 'format' => 'json', 'titles' => $pageName];
     $url = wfAppendQuery($apiUrl, $args);
     // Go on call the external site
     // @todo we need a good way to specify a timeout here.
     $ret = $this->http->get($url, [], __METHOD__);
     if ($ret === false) {
         wfDebugLog("MediaWikiSite", "call to external site failed: {$url}");
         return false;
     }
     $data = FormatJson::decode($ret, true);
     if (!is_array($data)) {
         wfDebugLog("MediaWikiSite", "call to <{$url}> returned bad json: " . $ret);
         return false;
     }
     $page = static::extractPageRecord($data, $pageName);
     if (isset($page['missing'])) {
         wfDebugLog("MediaWikiSite", "call to <{$url}> returned a marker for a missing page title! " . $ret);
         return false;
     }
     if (isset($page['invalid'])) {
         wfDebugLog("MediaWikiSite", "call to <{$url}> returned a marker for an invalid page title! " . $ret);
         return false;
     }
     if (!isset($page['title'])) {
         wfDebugLog("MediaWikiSite", "call to <{$url}> did not return a page title! " . $ret);
         return false;
     }
     return $page['title'];
 }
示例#2
0
 *
 * @file
 * @ingroup UtfNormal
 */
use UtfNormal\Validator;
if (PHP_SAPI != 'cli') {
    die("Run me from the command line please.\n");
}
require_once dirname(__DIR__) . '/vendor/autoload.php';
define('BENCH_CYCLES', 1);
define('BIGSIZE', 1024 * 1024 * 10);
// 10m
ini_set('memory_limit', BIGSIZE + 120 * 1024 * 1024);
$testfiles = array('testdata/washington.txt' => 'English text', 'testdata/berlin.txt' => 'German text', 'testdata/bulgakov.txt' => 'Russian text', 'testdata/tokyo.txt' => 'Japanese text', 'testdata/young.txt' => 'Korean text');
$normalizer = new Validator();
Validator::loadData();
foreach ($testfiles as $file => $desc) {
    benchmarkTest($normalizer, $file, $desc);
}
# -------
function benchmarkTest(&$u, $filename, $desc)
{
    print "Testing {$filename} ({$desc})...\n";
    $data = file_get_contents($filename);
    $all = $data;
    while (strlen($all) < BIGSIZE) {
        $all .= $all;
    }
    $data = $all;
    echo "Data is " . strlen($data) . " bytes.\n";
    $forms = array('quickIsNFCVerify', 'cleanUp');
示例#3
0
 /**
  * Returns true if the string is _definitely_ in NFC.
  * Returns false if not or uncertain.
  * @param string $string a UTF-8 string, altered on output to be valid UTF-8 safe for XML.
  * @return bool
  */
 static function quickIsNFCVerify(&$string)
 {
     return Validator::quickIsNFCVerify($string);
 }
 /**
  * Normalize string into NFC by using the cleanup method from UtfNormal.
  *
  * @param string $inputString The actual string to process.
  *
  * @return string
  */
 public function cleanupToNFC($inputString)
 {
     $cleaned = $inputString;
     $cleaned = $this->trimBadChars($cleaned);
     $cleaned = Validator::cleanUp($cleaned);
     return $cleaned;
 }