/**
  * @since 0.4
  *
  * @param string $text
  *
  * @return string
  */
 public function getSearchKey($text)
 {
     if ($text === null) {
         return null;
     }
     if ($text === '') {
         return '';
     }
     // composed normal form
     $nfcText = $this->stringNormalizer->cleanupToNFC($text);
     if (!is_string($nfcText) || $nfcText === '') {
         wfWarn("Unicode normalization failed for `{$text}`");
     }
     // WARNING: *any* invalid UTF8 sequence causes preg_replace to return an empty string.
     // Control character classes excluding private use areas.
     $strippedText = preg_replace('/[\\p{Cc}\\p{Cf}\\p{Cn}\\p{Cs}]+/u', ' ', $nfcText);
     // \p{Z} includes all whitespace characters and invisible separators.
     $strippedText = preg_replace('/^\\p{Z}+|\\p{Z}+$/u', '', $strippedText);
     if ($strippedText === '') {
         // NOTE: This happens when there is only whitespace in the string.
         //       However, preg_replace will also return an empty string if it
         //       encounters any invalid utf-8 sequence.
         return '';
     }
     //TODO: Use Language::lc to convert to lower case.
     //      But that requires us to load ALL the language objects,
     //      which loads ALL the messages, which makes us run out
     //      of RAM (see bug T43103).
     $normalized = mb_strtolower($strippedText, 'UTF-8');
     if (!is_string($normalized) || $normalized === '') {
         wfWarn("mb_strtolower normalization failed for `{$strippedText}`");
     }
     return $normalized;
 }
Esempio n. 2
0
 protected function addToOutput(Entity $entity, Status $status, $oldRevId = null)
 {
     $this->getResultBuilder()->addBasicEntityInformation($entity->getId(), 'entity');
     $this->getResultBuilder()->addRevisionIdFromStatusToResult($status, 'entity', $oldRevId);
     $params = $this->extractRequestParams();
     if (isset($params['site']) && isset($params['title'])) {
         $normTitle = $this->stringNormalizer->trimToNFC($params['title']);
         if ($normTitle !== $params['title']) {
             $this->getResultBuilder()->addNormalizedTitle($params['title'], $normTitle, 'normalized');
         }
     }
     $this->getResultBuilder()->markSuccess(1);
 }
 /**
  * Tries to find item id for given siteId and title combination
  *
  * @param string $siteId
  * @param string $title
  * @param bool $normalize
  *
  * @return ItemId|null
  */
 private function getItemId($siteId, $title, $normalize)
 {
     // FIXME: This code is duplicated in SpecialItemByTitle::execute!
     $title = $this->stringNormalizer->trimToNFC($title);
     $id = $this->siteLinkLookup->getItemIdForLink($siteId, $title);
     // Try harder by requesting normalization on the external site.
     if ($id === null && $normalize === true) {
         $siteObj = $this->siteStore->getSite($siteId);
         //XXX: this passes the normalized title back into $title by reference...
         $this->normalizeTitle($title, $siteObj);
         $id = $this->siteLinkLookup->getItemIdForLink($siteObj->getGlobalId(), $title);
     }
     return $id;
 }
 /**
  * Trims leading and trailing whitespace and performs unicode normalization
  * by calling Wikibase\StringNormalizer::trimToNFC().
  *
  * @see StringNormalizer::normalize()
  * @see Wikibase\StringNormalizer::trimToNFC()
  *
  * @param string $value the value to normalize
  *
  * @throws InvalidArgumentException if $value is not a string
  * @return string the normalized value
  */
 public function normalize($value)
 {
     return $this->normalizer->trimToNFC($value);
 }