/**
  * Encodes a string for JavaScript.
  *
  * @param string $input The string to encode, may be empty.
  * @return string The encoded string.
  */
 public function encode($input)
 {
     $stringLength = $this->charsetConversion->strlen('utf-8', $input);
     $encodedString = '';
     for ($i = 0; $i < $stringLength; $i++) {
         $c = $this->charsetConversion->substr('utf-8', $input, $i, 1);
         $encodedString .= $this->encodeCharacter($c);
     }
     return $encodedString;
 }
 /**
  * Check if $value is valid. If it is not valid, needs to add an error
  * to result.
  *
  * @param mixed $value
  * @return void
  */
 public function isValid($value)
 {
     $length = $this->charsetConverter->strlen('utf-8', $value);
     if ($length < (int) $this->options['minimum']) {
         $this->addError($this->renderMessage($this->options['errorMessage'][0], $this->options['errorMessage'][1], 'error'), 1441999425);
         return;
     }
     if (!isset($this->options['maximum']) || $this->options['maximum'] === '') {
         $this->options['maximum'] = null;
     }
     if ($this->options['maximum'] !== null && $length > (int) $this->options['maximum']) {
         $this->addError($this->renderMessage($this->options['errorMessage'][0], $this->options['errorMessage'][1], 'error'), 1441999425);
     }
 }
Beispiel #3
0
 /**
  * Returns TRUE if submitted value validates according to rule
  *
  * @return bool
  * @see \TYPO3\CMS\Form\Validation\ValidatorInterface::isValid()
  */
 public function isValid()
 {
     if ($this->requestHandler->has($this->fieldName)) {
         $value = $this->requestHandler->getByMethod($this->fieldName);
         $length = $this->charsetConverter->strlen('utf-8', $value);
         if ($length < $this->minimum) {
             return FALSE;
         }
         if ($this->maximum !== NULL && $length > $this->maximum) {
             return FALSE;
         }
     }
     return TRUE;
 }
Beispiel #4
0
 /**
  * Add word to word-array
  * This function should be used to make sure CJK sequences are split up in the right way
  *
  * @param array $words Array of accumulated words
  * @param string $wordString Complete Input string from where to extract word
  * @param int $start Start position of word in input string
  * @param int $len The Length of the word string from start position
  * @return void
  */
 public function addWords(&$words, &$wordString, $start, $len)
 {
     // Get word out of string:
     $theWord = substr($wordString, $start, $len);
     // Get next chars unicode number and find type:
     $bc = 0;
     $cp = $this->utf8_ord($theWord, $bc);
     list($cType) = $this->charType($cp);
     // If string is a CJK sequence we follow this algorithm:
     /*
             DESCRIPTION OF (CJK) ALGORITHMContinuous letters and numbers make up words. Spaces and symbols
             separate letters and numbers into words. This is sufficient for
             all western text.CJK doesn't use spaces or separators to separate words, so the only
             way to really find out what constitutes a word would be to have a
             dictionary and advanced heuristics. Instead, we form pairs from
             consecutive characters, in such a way that searches will find only
             characters that appear more-or-less the right sequence. For example:ABCDE => AB BC CD DEThis works okay since both the index and the search query is split
             in the same manner, and since the set of characters is huge so the
             extra matches are not significant.(Hint taken from ZOPEs chinese user group)[Kasper: As far as I can see this will only work well with or-searches!]
     */
     if ($cType == 'cjk') {
         // Find total string length:
         $strlen = $this->csObj->strlen('utf-8', $theWord);
         // Traverse string length and add words as pairs of two chars:
         for ($a = 0; $a < $strlen; $a++) {
             if ($strlen == 1 || $a < $strlen - 1) {
                 $words[] = $this->csObj->substr('utf-8', $theWord, $a, 2);
             }
         }
     } else {
         // Normal "single-byte" chars:
         // Remove chars:
         foreach ($this->lexerConf['removeChars'] as $skipJoin) {
             $theWord = str_replace($this->csObj->UnumberToChar($skipJoin), '', $theWord);
         }
         // Add word:
         $words[] = $theWord;
     }
 }
 /**
  * Marks up the search words from $this->sWarr in the $str with a color.
  *
  * @param string $str Text in which to find and mark up search words. This text is assumed to be UTF-8 like the search words internally is.
  * @return string Processed content.
  */
 public function markupSWpartsOfString($str)
 {
     $htmlParser = GeneralUtility::makeInstance(HtmlParser::class);
     // Init:
     $str = str_replace('&nbsp;', ' ', $htmlParser->bidir_htmlspecialchars($str, -1));
     $str = preg_replace('/\\s\\s+/', ' ', $str);
     $swForReg = array();
     // Prepare search words for regex:
     foreach ($this->sWArr as $d) {
         $swForReg[] = preg_quote($d['sword'], '/');
     }
     $regExString = '(' . implode('|', $swForReg) . ')';
     // Split and combine:
     $parts = preg_split('/' . $regExString . '/ui', ' ' . $str . ' ', 20000, PREG_SPLIT_DELIM_CAPTURE);
     // Constants:
     $summaryMax = $this->conf['results.']['markupSW_summaryMax'];
     $postPreLgd = $this->conf['results.']['markupSW_postPreLgd'];
     $postPreLgd_offset = $this->conf['results.']['markupSW_postPreLgd_offset'];
     $divider = $this->conf['results.']['markupSW_divider'];
     $occurencies = (count($parts) - 1) / 2;
     if ($occurencies) {
         $postPreLgd = MathUtility::forceIntegerInRange($summaryMax / $occurencies, $postPreLgd, $summaryMax / 2);
     }
     // Variable:
     $summaryLgd = 0;
     $output = array();
     // Shorten in-between strings:
     foreach ($parts as $k => $strP) {
         if ($k % 2 == 0) {
             // Find length of the summary part:
             $strLen = $this->charsetConverter->strlen('utf-8', $parts[$k]);
             $output[$k] = $parts[$k];
             // Possibly shorten string:
             if (!$k) {
                 // First entry at all (only cropped on the frontside)
                 if ($strLen > $postPreLgd) {
                     $output[$k] = $divider . preg_replace('/^[^[:space:]]+[[:space:]]/', '', $this->charsetConverter->crop('utf-8', $parts[$k], -($postPreLgd - $postPreLgd_offset)));
                 }
             } elseif ($summaryLgd > $summaryMax || !isset($parts[$k + 1])) {
                 // In case summary length is exceed OR if there are no more entries at all:
                 if ($strLen > $postPreLgd) {
                     $output[$k] = preg_replace('/[[:space:]][^[:space:]]+$/', '', $this->charsetConverter->crop('utf-8', $parts[$k], $postPreLgd - $postPreLgd_offset)) . $divider;
                 }
             } else {
                 // In-between search words:
                 if ($strLen > $postPreLgd * 2) {
                     $output[$k] = preg_replace('/[[:space:]][^[:space:]]+$/', '', $this->charsetConverter->crop('utf-8', $parts[$k], $postPreLgd - $postPreLgd_offset)) . $divider . preg_replace('/^[^[:space:]]+[[:space:]]/', '', $this->charsetConverter->crop('utf-8', $parts[$k], -($postPreLgd - $postPreLgd_offset)));
                 }
             }
             $summaryLgd += $this->charsetConverter->strlen('utf-8', $output[$k]);
             // Protect output:
             $output[$k] = htmlspecialchars($output[$k]);
             // If summary lgd is exceed, break the process:
             if ($summaryLgd > $summaryMax) {
                 break;
             }
         } else {
             $summaryLgd += $this->charsetConverter->strlen('utf-8', $strP);
             $output[$k] = '<strong class="tx-indexedsearch-redMarkup">' . htmlspecialchars($parts[$k]) . '</strong>';
         }
     }
     // Return result:
     return implode('', $output);
 }