public function testIndexOf() { // ASCII. $this->assertTrue(CRegex::indexOf("Hello there!", "/[^\\w ]/") == 11); $this->assertTrue(CRegex::indexOf("Hello! There!", "/[^\\w ]/", 6) == 12); $foundString; $pos = CRegex::indexOf("Hello MISTER there!", "/[A-Z]{2,}/", 0, $foundString); $this->assertTrue($pos == 6 && CString::equals($foundString, "MISTER")); $this->assertTrue(CRegex::indexOf("Hello there!", "/\\d/") == -1); // Unicode. $this->assertTrue(CRegex::indexOf("¡Hello there!", "/[^\\p{L} ]/u", 2) == 13); $this->assertTrue(CRegex::indexOf("¡Hello! There!", "/[^\\p{L} ]/u", 8) == 14); $foundString; $pos = CRegex::indexOf("¡Hello SEÑOR there!", "/\\p{Lu}{2,}/u", 0, $foundString); $this->assertTrue($pos == 8 && CUString::equals($foundString, "SEÑOR")); $this->assertTrue(CRegex::indexOf("¡Hello there!", "/\\d/u") == -1); }
/** * Wraps the text in a string to a specified width and returns the new string. * * @param string $string The string with the text to be wrapped. * @param int $width The wrapping width, in characters. * @param bitfield $wrappingFlags **OPTIONAL. Default is** `WRAPPING_DEFAULT`. The wrapping option(s). The * available options are `WRAPPING_BREAK_SPACELESS_LINES`, `WRAPPING_ALLOW_TRAILING_SPACES`, * `WRAPPING_DISALLOW_LEADING_SPACES`, and `WRAPPING_DONT_BREAK_SPACELESS_CJK_ENDING_LINES` * (see [Summary](#summary)). * @param string $newline **OPTIONAL. Default is** LF (U+000A). The newline character(s) to be used for making * new lines in the process of wrapping. * * @return string The wrapped text. */ public static function wordWrap($string, $width, $wrappingFlags = self::WRAPPING_DEFAULT, $newline = self::NEWLINE) { assert('is_cstring($string) && is_int($width) && is_bitfield($wrappingFlags) && is_cstring($newline)', vs(isset($this), get_defined_vars())); assert('$width > 0', vs(isset($this), get_defined_vars())); // Constant. Newline character that is used by the input string (after newline normalization). $normNl = self::NEWLINE; // Constant. Determines what characters should be considered spaces. // A character in the "Zs" Unicode category, an HT, or a Zero Width Space, except No-break Space and Narrow // No-break Space. $spaceSubjectRe = "(\\p{Zs}|\\x{0009}|\\x{200B})(?<!\\x{00A0}|\\x{202F})"; // Break enabling characters. // Soft Hyphen or Tibetan Mark Intersyllabic Tsheg. $breakAllowCharSubjectRe = "\\x{00AD}|\\x{0F0B}"; // Retrieve the wrapping options. $breakSpacelessLines = CBitField::isBitSet($wrappingFlags, self::WRAPPING_BREAK_SPACELESS_LINES); $allowTrailingSpaces = CBitField::isBitSet($wrappingFlags, self::WRAPPING_ALLOW_TRAILING_SPACES); $disallowLeadingSpaces = CBitField::isBitSet($wrappingFlags, self::WRAPPING_DISALLOW_LEADING_SPACES); $dontBreakSpacelessCjkEndingLines = CBitField::isBitSet($wrappingFlags, self::WRAPPING_DONT_BREAK_SPACELESS_CJK_ENDING_LINES); // Normalize newlines in the input string. $string = self::normNewlines($string, $normNl); $normNlLength = self::length($normNl); $newString = ""; $pos = 0; $bytePos = 0; $sLength = self::length($string); while (true) { $numCharsLeft = $sLength - $pos; // A portion begins at the very start or right after a newline, either it is native or added. The length of // a portion is the wrapping width or less. $portionLength = CMathi::min($width, $numCharsLeft); $portion = self::substr($string, $pos, $portionLength); $portionByteLength = CString::length($portion); if ($portionLength == $numCharsLeft) { // All done. $newString .= $portion; break; } // The starting position of the next portion. $nextPos = $pos + $portionLength; $nextBytePos = $bytePos + $portionByteLength; // Look for the first occurrence of a newline in the portion. $nlPos = self::indexOf($portion, $normNl); if ($nlPos != -1) { // This portion contains a newline, so the next portion is going to start right after this first found // newline. $subPLength = $nlPos + $normNlLength; $subP = self::substr($portion, 0, $subPLength); $newString .= $subP; $pos += $subPLength; $bytePos += CString::length($subP); continue; } // There are no newlines in this portion. Before the next step, make sure that the next portion is not // going to start with a newline. if ($numCharsLeft - $portionLength >= $normNlLength) { $nextPortionBeginning = self::substr($string, $nextPos, $normNlLength); if (self::indexOf($nextPortionBeginning, $normNl) == 0) { // The next portion is going to start with a newline, so no need to break this one, regardless of // whether or not it contains any spaces. $newString .= $portion; $pos = $nextPos; $bytePos = $nextBytePos; continue; } } // The next portion is not going to start with a newline. Look for the last occurrence of a space or // break-allow character in this portion. $lastSubjectBytePos = CRegex::lastIndexOf($portion, "/({$spaceSubjectRe})|({$breakAllowCharSubjectRe})/u", 0, $foundString); if ($lastSubjectBytePos != -1) { // Add a newline right after this last occurring space or break-allow character. $subP = CString::substring($portion, 0, $lastSubjectBytePos + CString::length($foundString)); $newString .= $subP; $newString .= $newline; $pos += self::length($subP); $bytePos += CString::length($subP); continue; } // There are no spaces or break-allow characters in this portion. Consider adding a newline right after the // portion. if ($breakSpacelessLines || !$dontBreakSpacelessCjkEndingLines && self::hasCjkChar(self::charAt($portion, $portionLength - 1))) { $newString .= $portion; $newString .= $newline; $pos = $nextPos; $bytePos = $nextBytePos; continue; } // There are no spaces or break-allow characters in this portion and it should go adjacent to the upcoming // text. Look for the first newline, space, or break-allow character in the upcoming text. $nextSubjectBytePos = CRegex::indexOf($string, "/{$normNl}|(({$spaceSubjectRe})|({$breakAllowCharSubjectRe}))(?!{$normNl})/u", $nextBytePos, $foundString); if ($nextSubjectBytePos != -1) { // Found a newline, space, or a break-allow character, so the next portion is going to start right // after it. $afterP = CString::substring($string, $nextBytePos, $nextSubjectBytePos + CString::length($foundString)); $newString .= $portion; $newString .= $afterP; if (!CString::equals($foundString, $normNl)) { // It is a space or break-allow character that was found, so add a newline after it. $newString .= $newline; } $pos += $portionLength + self::length($afterP); $bytePos += $portionByteLength + CString::length($afterP); continue; } // There are no spaces, newlines, or break-allow characters in the upcoming text. Finalize according to the // breaking options. if (!$breakSpacelessLines) { $newString .= $portion; $newString .= self::substr($string, $nextPos); } else { $newString .= $portion; $newString .= $newline; $pos = $nextPos; while (true) { $numCharsLeft = $sLength - $pos; $portionLength = CMathi::min($width, $numCharsLeft); $newString .= self::substr($string, $pos, $portionLength); if ($portionLength == $numCharsLeft) { break; } $newString .= $newline; $pos += $portionLength; } } break; } if (!$allowTrailingSpaces) { // Remove trailing spaces. $newString = CRegex::remove($newString, "/({$spaceSubjectRe})+(?={$normNl}|\\z)/u"); } if ($disallowLeadingSpaces) { // Remove leading spaces. $newString = CRegex::remove($newString, "/(?<={$normNl}|^)({$spaceSubjectRe})+/u"); } return $newString; }
/** * Returns the position of the first occurrence of a regular expression pattern in a string, optionally reporting * the substring that matched the pattern. * * **NOTE.** Unlike the non-regex methods of the class, which count positions within a Unicode string in * characters, the PCRE engine and therefore this method count positions in bytes. * * @param string $ofPattern The searched pattern. * @param int $startPos **OPTIONAL. Default is** `0`. The starting position for the search. * @param reference $foundString **OPTIONAL. OUTPUT.** If the pattern has been found after the method was called * with this parameter provided, the parameter's value, which is of type `CUStringObject`, is the first substring * that matched the pattern. * * @return int The position of the first occurrence of the pattern in the string or `-1` if no such pattern was * found. */ public function reIndexOf($ofPattern, $startPos = 0, &$foundString = null) { $ofPattern = self::ensureUModifier($ofPattern); $ret = CRegex::indexOf($this, $ofPattern, $startPos, $foundString); $foundString = to_oop($foundString); return $ret; }