Esempio n. 1
0
 public function testUTF8v()
 {
     $strings = array('' => array(), 'x' => array('x'), 'quack' => array('q', 'u', 'a', 'c', 'k'), "x東y" => array('x', "東", 'y'), "x͠y" => array("x", "͠", 'y'));
     foreach ($strings as $str => $expect) {
         $this->assertEqual($expect, phutil_utf8v($str), 'Vector of ' . $str);
     }
 }
 public function render()
 {
     $viewer = $this->getViewer();
     $rows = array();
     foreach ($this->events as $event) {
         // Limit display log size. If a daemon gets stuck in an output loop this
         // page can be like >100MB if we don't truncate stuff. Try to do cheap
         // line-based truncation first, and fall back to expensive UTF-8 character
         // truncation if that doesn't get things short enough.
         $message = $event->getMessage();
         $more = null;
         if (!$this->showFullMessage) {
             $more_lines = null;
             $more_chars = null;
             $line_limit = 12;
             if (substr_count($message, "\n") > $line_limit) {
                 $message = explode("\n", $message);
                 $more_lines = count($message) - $line_limit;
                 $message = array_slice($message, 0, $line_limit);
                 $message = implode("\n", $message);
             }
             $char_limit = 8192;
             if (strlen($message) > $char_limit) {
                 $message = phutil_utf8v($message);
                 $more_chars = count($message) - $char_limit;
                 $message = array_slice($message, 0, $char_limit);
                 $message = implode('', $message);
             }
             if ($more_chars) {
                 $more = new PhutilNumber($more_chars);
                 $more = pht('Show %d more character(s)...', $more);
             } else {
                 if ($more_lines) {
                     $more = new PhutilNumber($more_lines);
                     $more = pht('Show %d more line(s)...', $more);
                 }
             }
             if ($more) {
                 $id = $event->getID();
                 $more = array("\n...\n", phutil_tag('a', array('href' => "/daemon/event/{$id}/"), $more));
             }
         }
         $row = array($event->getLogType(), phabricator_date($event->getEpoch(), $viewer), phabricator_time($event->getEpoch(), $viewer), array($message, $more));
         if ($this->combinedLog) {
             array_unshift($row, phutil_tag('a', array('href' => '/daemon/log/' . $event->getLogID() . '/'), pht('Daemon %s', $event->getLogID())));
         }
         $rows[] = $row;
     }
     $classes = array('', '', 'right', 'wide prewrap');
     $headers = array('Type', 'Date', 'Time', 'Message');
     if ($this->combinedLog) {
         array_unshift($classes, 'pri');
         array_unshift($headers, 'Daemon');
     }
     $log_table = new AphrontTableView($rows);
     $log_table->setHeaders($headers);
     $log_table->setColumnClasses($classes);
     return $log_table->render();
 }
 public function render()
 {
     $rows = array();
     if (!$this->user) {
         throw new Exception("Call setUser() before rendering!");
     }
     foreach ($this->events as $event) {
         // Limit display log size. If a daemon gets stuck in an output loop this
         // page can be like >100MB if we don't truncate stuff. Try to do cheap
         // line-based truncation first, and fall back to expensive UTF-8 character
         // truncation if that doesn't get things short enough.
         $message = $event->getMessage();
         $more_lines = null;
         $more_chars = null;
         $line_limit = 12;
         if (substr_count($message, "\n") > $line_limit) {
             $message = explode("\n", $message);
             $more_lines = count($message) - $line_limit;
             $message = array_slice($message, 0, $line_limit);
             $message = implode("\n", $message);
         }
         $char_limit = 8192;
         if (strlen($message) > $char_limit) {
             $message = phutil_utf8v($message);
             $more_chars = count($message) - $char_limit;
             $message = array_slice($message, 0, $char_limit);
             $message = implode('', $message);
         }
         $more = null;
         if ($more_chars) {
             $more = number_format($more_chars);
             $more = "\n<... {$more} more characters ...>";
         } else {
             if ($more_lines) {
                 $more = number_format($more_lines);
                 $more = "\n<... {$more} more lines ...>";
             }
         }
         $row = array(phutil_escape_html($event->getLogType()), phabricator_date($event->getEpoch(), $this->user), phabricator_time($event->getEpoch(), $this->user), str_replace("\n", '<br />', phutil_escape_html($message . $more)));
         if ($this->combinedLog) {
             array_unshift($row, phutil_render_tag('a', array('href' => '/daemon/log/' . $event->getLogID() . '/'), phutil_escape_html('Daemon ' . $event->getLogID())));
         }
         $rows[] = $row;
     }
     $classes = array('', '', 'right', 'wide wrap');
     $headers = array('Type', 'Date', 'Time', 'Message');
     if ($this->combinedLog) {
         array_unshift($classes, 'pri');
         array_unshift($headers, 'Daemon');
     }
     $log_table = new AphrontTableView($rows);
     $log_table->setHeaders($headers);
     $log_table->setColumnClasses($classes);
     return $log_table->render();
 }
 public function getHighlightFuture($source)
 {
     $color = 0;
     $colors = array('rbw_r', 'rbw_o', 'rbw_y', 'rbw_g', 'rbw_b', 'rbw_i', 'rbw_v');
     $result = array();
     foreach (phutil_utf8v($source) as $character) {
         if ($character == ' ' || $character == "\n") {
             $result[] = $character;
             continue;
         }
         $result[] = '<span class="' . $colors[$color] . '">' . phutil_escape_html($character) . '</span>';
         $color = ($color + 1) % count($colors);
     }
     $result = implode('', $result);
     return new ImmediateFuture($result);
 }
 public function getHighlightFuture($source)
 {
     $color = 0;
     $colors = array('rbw_r', 'rbw_o', 'rbw_y', 'rbw_g', 'rbw_b', 'rbw_i', 'rbw_v');
     $result = array();
     foreach (phutil_utf8v($source) as $character) {
         if ($character == ' ' || $character == "\n") {
             $result[] = $character;
             continue;
         }
         $result[] = phutil_tag('span', array('class' => $colors[$color]), $character);
         $color = ($color + 1) % count($colors);
     }
     $result = phutil_implode_html('', $result);
     return new ImmediateFuture($result);
 }
 public function getStringParts()
 {
     $input_text = $this->inputText;
     $text_array = phutil_utf8v($input_text);
     for ($ii = 0; $ii < count($text_array); $ii++) {
         $char = $text_array[$ii];
         $char_hex = bin2hex($char);
         if (array_key_exists($char, self::$invisibleChars)) {
             $text_array[$ii] = array('special' => true, 'value' => '<' . self::$invisibleChars[$char] . '>');
         } else {
             if (ord($char) < 32) {
                 $text_array[$ii] = array('special' => true, 'value' => '<0x' . $char_hex . '>');
             } else {
                 $text_array[$ii] = array('special' => false, 'value' => $char);
             }
         }
     }
     return $text_array;
 }
 public function correctSpelling($input, array $options)
 {
     $matrix = $this->getEditDistanceMatrix();
     if (!$matrix) {
         throw new PhutilInvalidStateException('setEditDistanceMatrix');
     }
     $max_distance = $this->getMaximumDistance();
     if (!$max_distance) {
         throw new PhutilInvalidStateException('setMaximumDistance');
     }
     $input = $this->normalizeString($input);
     foreach ($options as $key => $option) {
         $options[$key] = $this->normalizeString($option);
     }
     $distances = array();
     $inputv = phutil_utf8v($input);
     foreach ($options as $option) {
         $optionv = phutil_utf8v($option);
         $matrix->setSequences($optionv, $inputv);
         $distances[$option] = $matrix->getEditDistance();
     }
     asort($distances);
     $best = min($max_distance, head($distances));
     foreach ($distances as $option => $distance) {
         if ($distance > $best) {
             unset($distances[$option]);
         }
     }
     // Before filtering, check if we have multiple equidistant matches and
     // return them if we do. This prevents us from, e.g., matching "alnd" with
     // both "land" and "amend", then dropping "land" for being too short, and
     // incorrectly completing to "amend".
     if (count($distances) > 1) {
         return array_keys($distances);
     }
     foreach ($distances as $option => $distance) {
         if (phutil_utf8_strlen($option) < $distance) {
             unset($distances[$option]);
         }
     }
     return array_keys($distances);
 }
Esempio n. 8
0
 private function wrapICSLine($line)
 {
     $out = array();
     $buf = '';
     // NOTE: The line may contain sequences of combining characters which are
     // more than 80 bytes in length. If it does, we'll split them in the
     // middle of the sequence. This is okay and generally anticipated by
     // RFC5545, which even allows implementations to split multibyte
     // characters. The sequence will be stitched back together properly by
     // whatever is parsing things.
     foreach (phutil_utf8v($line) as $character) {
         // If adding this character would bring the line over 75 bytes, start
         // a new line.
         if (strlen($buf) + strlen($character) > 75) {
             $out[] = $buf . "\r\n";
             $buf = ' ';
         }
         $buf .= $character;
     }
     $out[] = $buf . "\r\n";
     return implode('', $out);
 }
Esempio n. 9
0
/**
 * Split a UTF-8 string into an array of characters. Combining characters
 * are not split.
 *
 * @param string A valid utf-8 string.
 * @return list  A list of characters in the string.
 */
function phutil_utf8v_combined($string)
{
    $components = phutil_utf8v($string);
    $array_length = count($components);
    // If the first character in the string is a combining character,
    // prepend a space to the string.
    if ($array_length > 0 && phutil_utf8_is_combining_character($components[0])) {
        $string = ' ' . $string;
        $components = phutil_utf8v($string);
        $array_length++;
    }
    for ($index = 1; $index < $array_length; $index++) {
        if (phutil_utf8_is_combining_character($components[$index])) {
            $components[$index - 1] = $components[$index - 1] . $components[$index];
            unset($components[$index]);
            $components = array_values($components);
            $index--;
            $array_length = count($components);
        }
    }
    return $components;
}
 /**
  * Find the words which are part of the query string, and bold them in a
  * result string. This makes it easier for users to see why a result
  * matched their query.
  */
 private function emboldenQuery($str)
 {
     $query = $this->query->getParameter('query');
     if (!strlen($query) || !strlen($str)) {
         return $str;
     }
     // This algorithm is safe but not especially fast, so don't bother if
     // we're dealing with a lot of data. This mostly prevents silly/malicious
     // queries from doing anything bad.
     if (strlen($query) + strlen($str) > 2048) {
         return $str;
     }
     // Keep track of which characters we're going to make bold. This is
     // byte oriented, but we'll make sure we don't put a bold in the middle
     // of a character later.
     $bold = array_fill(0, strlen($str), false);
     // Split the query into words.
     $parts = preg_split('/ +/', $query);
     // Find all occurrences of each word, and mark them to be emboldened.
     foreach ($parts as $part) {
         $part = trim($part);
         $part = trim($part, '"+');
         if (!strlen($part)) {
             continue;
         }
         $matches = null;
         $has_matches = preg_match_all('/(?:^|\\b)(' . preg_quote($part, '/') . ')/i', $str, $matches, PREG_OFFSET_CAPTURE);
         if (!$has_matches) {
             continue;
         }
         // Flag the matching part of the range for boldening.
         foreach ($matches[1] as $match) {
             $offset = $match[1];
             for ($ii = 0; $ii < strlen($match[0]); $ii++) {
                 $bold[$offset + $ii] = true;
             }
         }
     }
     // Split the string into ranges, applying bold styling as required.
     $out = array();
     $buf = '';
     $pos = 0;
     $is_bold = false;
     foreach (phutil_utf8v($str) as $chr) {
         if ($bold[$pos] != $is_bold) {
             if (strlen($buf)) {
                 if ($is_bold) {
                     $out[] = phutil_tag('strong', array(), $buf);
                 } else {
                     $out[] = $buf;
                 }
                 $buf = '';
             }
             $is_bold = !$is_bold;
         }
         $buf .= $chr;
         $pos += strlen($chr);
     }
     if (strlen($buf)) {
         if ($is_bold) {
             $out[] = phutil_tag('strong', array(), $buf);
         } else {
             $out[] = $buf;
         }
     }
     return $out;
 }
Esempio n. 11
0
 private function unescapeTextValue($data)
 {
     $result = array();
     $buf = '';
     $esc = false;
     foreach (phutil_utf8v($data) as $c) {
         if (!$esc) {
             if ($c == '\\') {
                 $esc = true;
             } else {
                 if ($c == ',') {
                     $result[] = $buf;
                     $buf = '';
                 } else {
                     $buf .= $c;
                 }
             }
         } else {
             switch ($c) {
                 case 'n':
                 case 'N':
                     $buf .= "\n";
                     break;
                 default:
                     $buf .= $c;
                     break;
             }
         }
     }
     if ($esc) {
         $this->raiseParseFailure(self::PARSE_UNESCAPED_BACKSLASH, pht('ICS document contains TEXT value ending with unescaped ' . 'backslash.'));
     }
     $result[] = $buf;
     return $result;
 }
 protected function withNgramsConstraint(PhabricatorSearchNgrams $index, $value)
 {
     if (strlen($value)) {
         $this->ngrams[] = array('index' => $index, 'value' => $value, 'length' => count(phutil_utf8v($value)));
     }
     return $this;
 }
Esempio n. 13
0
 /**
  * Format the log string, replacing "%x" variables with values.
  *
  * @return string Finalized, log string for writing to disk.
  * @task internals
  */
 private function format()
 {
     // Always convert '%%' to literal '%'.
     $map = array('%' => '%') + $this->data;
     $result = '';
     $saw_percent = false;
     foreach (phutil_utf8v($this->format) as $c) {
         if ($saw_percent) {
             $saw_percent = false;
             if (array_key_exists($c, $map)) {
                 $result .= addcslashes($map[$c], "..\\..ÿ");
             } else {
                 $result .= '-';
             }
         } else {
             if ($c == '%') {
                 $saw_percent = true;
             } else {
                 $result .= $c;
             }
         }
     }
     return rtrim($result) . "\n";
 }
 /**
  * Hard-wrap a piece of UTF-8 text with embedded HTML tags and entities.
  *
  * @param   string An HTML string with tags and entities.
  * @return  string Hard-wrapped string.
  */
 protected function lineWrap($line)
 {
     $c = 0;
     $break_here = array();
     // Convert the UTF-8 string into a list of UTF-8 characters.
     $vector = phutil_utf8v($line);
     $len = count($vector);
     $byte_pos = 0;
     for ($ii = 0; $ii < $len; ++$ii) {
         // An ampersand indicates an HTML entity; consume the whole thing (until
         // ";") but treat it all as one character.
         if ($vector[$ii] == '&') {
             do {
                 ++$ii;
             } while ($vector[$ii] != ';');
             ++$c;
             // An "<" indicates an HTML tag, consume the whole thing but don't treat
             // it as a character.
         } else {
             if ($vector[$ii] == '<') {
                 do {
                     ++$ii;
                 } while ($vector[$ii] != '>');
             } else {
                 ++$c;
             }
         }
         // Keep track of where we need to break the string later.
         if ($c == $this->lineWidth) {
             $break_here[$ii] = true;
             $c = 0;
         }
     }
     $result = array();
     foreach ($vector as $ii => $char) {
         $result[] = $char;
         if (isset($break_here[$ii])) {
             $result[] = "<span class=\"over-the-line\">⬅</span><br />";
         }
     }
     return implode('', $result);
 }
Esempio n. 15
0
/**
 * Shorten a string to provide a summary, respecting UTF-8 characters. This
 * function attempts to truncate strings at word boundaries.
 *
 * NOTE: This function makes a best effort to apply some reasonable rules but
 * will not work well for the full range of unicode languages. For instance,
 * no effort is made to deal with combining characters.
 *
 * @param   string  UTF-8 string to shorten.
 * @param   int     Maximum length of the result.
 * @param   string  If the string is shortened, add this at the end. Defaults to
 *                  horizontal ellipsis.
 * @return  string  A string with no more than the specified character length.
 */
function phutil_utf8_shorten($string, $length, $terminal = "…")
{
    $terminal_len = count(phutil_utf8v($terminal));
    if ($terminal_len >= $length) {
        // If you provide a terminal we still enforce that the result (including
        // the terminal) is no longer than $length, but we can't do that if the
        // terminal is too long.
        throw new Exception("String terminal length must be less than string length!");
    }
    $string_v = phutil_utf8v($string);
    $string_len = count($string_v);
    if ($string_len <= $length) {
        // If the string is already shorter than the requested length, simply return
        // it unmodified.
        return $string;
    }
    // NOTE: This is not complete, and there are many other word boundary
    // characters and reasonable places to break words in the UTF-8 character
    // space. For now, this gives us reasonable behavior for latin langauges. We
    // don't necessarily have access to PCRE+Unicode so there isn't a great way
    // for us to look up character attributes.
    // If we encounter these, prefer to break on them instead of cutting the
    // string off in the middle of a word.
    static $break_characters = array(' ' => true, "\n" => true, ';' => true, ':' => true, '[' => true, '(' => true, ',' => true, '-' => true);
    // If we encounter these, shorten to this character exactly without appending
    // the terminal.
    static $stop_characters = array('.' => true, '!' => true, '?' => true);
    // Search backward in the string, looking for reasonable places to break it.
    $word_boundary = null;
    $stop_boundary = null;
    // If we do a word break with a terminal, we have to look beyond at least the
    // number of characters in the terminal.
    $terminal_area = $length - $terminal_len;
    for ($ii = $length; $ii >= 0; $ii--) {
        $c = $string_v[$ii];
        if (isset($break_characters[$c]) && $ii <= $terminal_area) {
            $word_boundary = $ii;
        } else {
            if (isset($stop_characters[$c]) && $ii < $length) {
                $stop_boundary = $ii + 1;
                break;
            } else {
                if ($word_boundary !== null) {
                    break;
                }
            }
        }
    }
    if ($stop_boundary !== null) {
        // We found a character like ".". Cut the string there, without appending
        // the terminal.
        $string_part = array_slice($string_v, 0, $stop_boundary);
        return implode('', $string_part);
    }
    // If we didn't find any boundary characters or we found ONLY boundary
    // characters, just break at the maximum character length.
    if ($word_boundary === null || $word_boundary === 0) {
        $word_boundary = $length - $terminal_len;
    }
    $string_part = array_slice($string_v, 0, $word_boundary);
    $string_part = implode('', $string_part);
    return $string_part . $terminal;
}
 public function truncateString($string)
 {
     // First, check if the string has fewer bytes than the most restrictive
     // limit. Codepoints and glyphs always take up at least one byte, so we can
     // just return the string unmodified if we're under all of the limits.
     $byte_len = strlen($string);
     if ($byte_len <= $this->minimumLimit) {
         return $string;
     }
     // If we need the vector of codepoints, build it.
     $string_pv = null;
     if ($this->maximumCodepoints) {
         $string_pv = phutil_utf8v($string);
         $point_len = count($string_pv);
     }
     // We always need the combined vector, even if we're only doing byte or
     // codepoint truncation, because we don't want to truncate to half of a
     // combining character.
     $string_gv = phutil_utf8v_combined($string);
     $glyph_len = count($string_gv);
     // Now, check if we're still over the limits. For example, a string may
     // be over the raw byte limit but under the glyph limit if it contains
     // several multibyte characters.
     $too_long = false;
     if ($this->maximumBytes && $byte_len > $this->maximumBytes) {
         $too_long = true;
     }
     if ($this->maximumCodepoints && $point_len > $this->maximumCodepoints) {
         $too_long = true;
     }
     if ($this->maximumGlyphs && $glyph_len > $this->maximumGlyphs) {
         $too_long = true;
     }
     if (!$too_long) {
         return $string;
     }
     // This string is legitimately longer than at least one of the limits, so
     // we need to truncate it. Find the minimum cutoff point: this is the last
     // glyph we can possibly return while satisfying the limits and having space
     // for the terminator.
     $cutoff = $glyph_len;
     if ($this->maximumBytes) {
         if ($byte_len <= $this->maximumBytes) {
             $cutoff = $glyph_len;
         } else {
             $bytes = $this->terminatorBytes;
             for ($ii = 0; $ii < $glyph_len; $ii++) {
                 $bytes += strlen($string_gv[$ii]);
                 if ($bytes > $this->maximumBytes) {
                     $cutoff = $ii;
                     break;
                 }
             }
         }
     }
     if ($this->maximumCodepoints) {
         if ($point_len <= $this->maximumCodepoints) {
             $cutoff = min($cutoff, $glyph_len);
         } else {
             $points = 0;
             for ($ii = 0; $ii < $glyph_len; $ii++) {
                 $glyph_bytes = strlen($string_gv[$ii]);
                 while ($points < $point_len) {
                     $glyph_bytes -= strlen($string_pv[$points]);
                     $points++;
                     if ($glyph_bytes <= 0) {
                         break;
                     }
                 }
                 $points_total = $points + $this->terminatorCodepoints;
                 if ($points_total > $this->maximumCodepoints) {
                     $cutoff = min($cutoff, $ii);
                     break;
                 }
             }
         }
     }
     if ($this->maximumGlyphs) {
         if ($glyph_len <= $this->maximumGlyphs) {
             $cutoff = min($cutoff, $glyph_len);
         } else {
             $cutoff = min($cutoff, $this->maximumGlyphs - $this->terminatorGlyphs);
         }
     }
     // If we don't have enough characters for anything, just return the
     // terminator.
     if ($cutoff <= 0) {
         return $this->terminator;
     }
     // Otherwise, we're going to try to cut the string off somewhere reasonable
     // rather than somewhere arbitrary.
     // NOTE: This is not complete, and there are many other word boundary
     // characters and reasonable places to break words in the UTF-8 character
     // space. For now, this gives us reasonable behavior for latin languages. We
     // don't necessarily have access to PCRE+Unicode so there isn't a great way
     // for us to look up character attributes.
     // If we encounter these, prefer to break on them instead of cutting the
     // string off in the middle of a word.
     static $break_characters = array(' ' => true, "\n" => true, ';' => true, ':' => true, '[' => true, '(' => true, ',' => true, '-' => true);
     // If we encounter these, shorten to this character exactly without
     // appending the terminal.
     static $stop_characters = array('.' => true, '!' => true, '?' => true);
     // Search backward in the string, looking for reasonable places to break it.
     $word_boundary = null;
     $stop_boundary = null;
     // If we do a word break with a terminal, we have to look beyond at least
     // the number of characters in the terminal. If the terminal is longer than
     // the required length, we'll skip this whole block and return it on its
     // own.
     // Only search backward for a while. At some point we don't get a better
     // result by looking through the whole string, and if this is "MMM..." or
     // a non-latin language without word break characters we're just wasting
     // time.
     $search = max(0, $cutoff - 256);
     for ($ii = min($cutoff, $glyph_len - 1); $ii >= $search; $ii--) {
         $c = $string_gv[$ii];
         if (isset($break_characters[$c])) {
             $word_boundary = $ii;
         } else {
             if (isset($stop_characters[$c])) {
                 $stop_boundary = $ii + 1;
                 break;
             } else {
                 if ($word_boundary !== null) {
                     break;
                 }
             }
         }
     }
     if ($stop_boundary !== null) {
         // We found a character like ".". Cut the string there, without appending
         // the terminal.
         $string_part = array_slice($string_gv, 0, $stop_boundary);
         return implode('', $string_part);
     }
     // If we didn't find any boundary characters or we found ONLY boundary
     // characters, just break at the maximum character length.
     if ($word_boundary === null || $word_boundary === 0) {
         $word_boundary = $cutoff;
     }
     $string_part = array_slice($string_gv, 0, $word_boundary);
     $string_part = implode('', $string_part);
     return $string_part . $this->terminator;
 }
Esempio n. 17
0
/**
 * Hard-wrap a block of UTF-8 text with embedded HTML tags and entities.
 *
 * @param   string An HTML string with tags and entities.
 * @return  list   List of hard-wrapped lines.
 * @group utf8
 */
function phutil_utf8_hard_wrap_html($string, $width)
{
    $break_here = array();
    // Convert the UTF-8 string into a list of UTF-8 characters.
    $vector = phutil_utf8v($string);
    $len = count($vector);
    $char_pos = 0;
    for ($ii = 0; $ii < $len; ++$ii) {
        // An ampersand indicates an HTML entity; consume the whole thing (until
        // ";") but treat it all as one character.
        if ($vector[$ii] == '&') {
            do {
                ++$ii;
            } while ($vector[$ii] != ';');
            ++$char_pos;
            // An "<" indicates an HTML tag, consume the whole thing but don't treat
            // it as a character.
        } else {
            if ($vector[$ii] == '<') {
                do {
                    ++$ii;
                } while ($vector[$ii] != '>');
            } else {
                ++$char_pos;
            }
        }
        // Keep track of where we need to break the string later.
        if ($char_pos == $width) {
            $break_here[$ii] = true;
            $char_pos = 0;
        }
    }
    $result = array();
    $string = '';
    foreach ($vector as $ii => $char) {
        $string .= $char;
        if (isset($break_here[$ii])) {
            $result[] = $string;
            $string = '';
        }
    }
    if (strlen($string)) {
        $result[] = $string;
    }
    return $result;
}
Esempio n. 18
0
/**
 * Soft wrap text for display on a console, respecting UTF8 character boundaries
 * and ANSI color escape sequences.
 *
 * @param   string  Text to wrap.
 * @param   int     Optional indent level.
 * @return  string  Wrapped text.
 *
 * @group console
 */
function phutil_console_wrap($text, $indent = 0)
{
    $lines = array();
    $width = 78 - $indent;
    $esc = chr(27);
    $break_pos = null;
    $len_after_break = 0;
    $line_len = 0;
    $line = array();
    $lines = array();
    $vector = phutil_utf8v($text);
    $vector_len = count($vector);
    for ($ii = 0; $ii < $vector_len; $ii++) {
        $chr = $vector[$ii];
        // If this is an ANSI escape sequence for a color code, just consume it
        // without counting it toward the character limit. This prevents lines
        // with bold/color on them from wrapping too early.
        if ($chr == $esc) {
            for ($ii; $ii < $vector_len; $ii++) {
                $line[] = $vector[$ii];
                if ($vector[$ii] == 'm') {
                    break;
                }
            }
            continue;
        }
        $line[] = $chr;
        ++$line_len;
        ++$len_after_break;
        if ($line_len > $width) {
            if ($break_pos !== null) {
                $slice = array_slice($line, 0, $break_pos);
                while (count($slice) && end($slice) == ' ') {
                    array_pop($slice);
                }
                $slice[] = "\n";
                $lines[] = $slice;
                $line = array_slice($line, $break_pos);
                $line_len = $len_after_break;
                $len_after_break = 0;
                $break_pos = null;
            }
        }
        if ($chr == " ") {
            $break_pos = count($line);
            $len_after_break = 0;
        }
        if ($chr == "\n") {
            $lines[] = $line;
            $line = array();
            $len_after_break = 0;
            $line_len = 0;
            $break_pos = null;
        }
    }
    if ($line) {
        if ($line) {
            $lines[] = $line;
        }
    }
    $pre = null;
    if ($indent) {
        $pre = str_repeat(' ', $indent);
    }
    foreach ($lines as $idx => $line) {
        $lines[$idx] = $pre . implode('', $line);
    }
    return implode('', $lines);
}