コード例 #1
0
 function break_into_words($content)
 {
     $content = trim($content);
     if ($content == '') {
         return array();
     }
     // Extract Unicode characters from the raw content data
     $ptr = 0;
     $utf8_chars = array();
     $ucs2_chars = array();
     $size = strlen($content);
     while ($ptr < $size) {
         $utf8_char = ManagerEncoding::get_next_utf8_char($content, $ptr);
         $utf8_chars[] = $utf8_char;
         $ucs2_chars[] = utf8_to_code($utf8_char);
     }
     // Get unicode line breaking classes
     $classes = array_map(array($this, 'get_line_break_class'), $ucs2_chars);
     $this->find_line_break($classes, $breaks, count($classes));
     // Make words array
     $words = array();
     $word = '';
     for ($i = 0, $size = count($breaks); $i < $size; $i++) {
         $word .= $utf8_chars[$i];
         $break = $breaks[$i];
         if ($break == LB_INDIRECT || $break == LB_INDIRECT_CM || $break == LB_DIRECT || $break == LB_EXPLICIT) {
             $words[] = trim($word);
             $word = '';
         }
     }
     return $words;
 }
コード例 #2
0
 function process_word($raw_content, &$pipeline)
 {
     if ($raw_content === '') {
         return false;
     }
     $ptr = 0;
     $word = '';
     $hyphens = array();
     $encoding = 'iso-8859-1';
     $manager_encoding =& ManagerEncoding::get();
     $text_box =& TextBox::create_empty($pipeline);
     $len = strlen($raw_content);
     while ($ptr < $len) {
         $char = $manager_encoding->getNextUTF8Char($raw_content, $ptr);
         // Check if current  char is a soft hyphen  character. It it is,
         // remove it from the word  (as it should not be drawn normally)
         // and store its location
         if ($char == SYMBOL_SHY) {
             $hyphens[] = strlen($word);
         } else {
             $mapping = $manager_encoding->getMapping($char);
             /**
              * If this character is not found in predefined encoding vectors,
              * we'll use "Custom" encoding and add single-character TextBox
              *
              * @TODO: handle characters without known glyph names
              */
             if (is_null($mapping)) {
                 /**
                  * No mapping to default encoding vectors found for this character
                  */
                 /**
                  * Add last word
                  */
                 if ($word !== '') {
                     $text_box->add_subword($word, $encoding, $hyphens);
                 }
                 /**
                  * Add current symbol
                  */
                 $custom_char = $manager_encoding->addCustomChar(utf8_to_code($char));
                 $text_box->add_subword($custom_char, $manager_encoding->getCustomEncodingName(), $hyphens);
                 $word = '';
             } else {
                 if (isset($mapping[$encoding])) {
                     $word .= $mapping[$encoding];
                 } else {
                     // This condition prevents empty text boxes from appearing; say, if word starts with a national
                     // character, an () - text box with no letters will be generated, in rare case causing a random line
                     // wraps, if container is narrow
                     if ($word !== '') {
                         $text_box->add_subword($word, $encoding, $hyphens);
                     }
                     reset($mapping);
                     list($encoding, $add) = each($mapping);
                     $word = $mapping[$encoding];
                     $hyphens = array();
                 }
             }
         }
     }
     if ($word !== '') {
         $text_box->add_subword($word, $encoding, $hyphens);
     }
     $this->add_child($text_box);
     return true;
 }