コード例 #1
0
ファイル: Text.php プロジェクト: jimsafley/plugin-Ngram
 /**
  * Constructor
  *
  * @param string $text
  * @param string $locale
  */
 public function __construct($text, $locale = null)
 {
     if (!$locale) {
         $locale = ini_get('intl.default_locale');
     }
     $iterator = \IntlBreakIterator::createWordInstance($locale);
     $iterator->setText($text);
     foreach ($iterator->getPartsIterator() as $part) {
         if (\IntlBreakIterator::WORD_NONE !== $iterator->getRuleStatus()) {
             $this->words[] = $part;
         }
     }
 }
コード例 #2
0
<?php

ini_set("intl.error_level", E_WARNING);
ini_set("intl.default_locale", "pt_PT");
$bi = IntlBreakIterator::createWordInstance('pt');
$bi->setText('foo bar trans zoo bee');
var_dump($bi->isBoundary(0));
var_dump($bi->isBoundary(7));
var_dump($bi->isBoundary(-1));
var_dump($bi->isBoundary(1));
var_dump($bi->isBoundary(50));
?>
==DONE==
コード例 #3
0
ファイル: cpbi_parts_iterator.php プロジェクト: badlamer/hhvm
<?php

ini_set("intl.error_level", E_WARNING);
ini_set("intl.default_locale", "pt_PT");
$text = 'ตัวอย่างข้อความ';
$it = IntlBreakIterator::createCodePointInstance()->getPartsIterator();
$it->getBreakIterator()->setText($text);
foreach ($it as $k => $v) {
    echo "{$k}. {$v} (" . sprintf("U+%04X", $it->getBreakIterator()->getLastCodePoint()) . ") at {$it->getBreakIterator()->current()}\r\n";
}
?>
==DONE==
コード例 #4
0
ファイル: class.format.php プロジェクト: gizur/osticket
 function searchable($text, $lang = false)
 {
     global $cfg;
     if (function_exists('normalizer_normalize')) {
         // Normalize text input :: remove diacritics and such
         $text = normalizer_normalize($text, Normalizer::FORM_C);
     } else {
         // As a lightweight compatiblity, use a lightweight C
         // normalizer with diacritic removal, thanks
         // http://ahinea.com/en/tech/accented-translate.html
         $tr = array("ä" => "a", "ñ" => "n", "ö" => "o", "ü" => "u", "ÿ" => "y");
         $text = strtr($text, $tr);
     }
     // Decompose compatible versions of characters (ä => ae)
     $tr = array("ß" => "ss", "Æ" => "AE", "æ" => "ae", "IJ" => "IJ", "ij" => "ij", "Œ" => "OE", "œ" => "oe", "Ð" => "D", "Đ" => "D", "ð" => "d", "đ" => "d", "Ħ" => "H", "ħ" => "h", "ı" => "i", "ĸ" => "k", "Ŀ" => "L", "Ł" => "L", "ŀ" => "l", "ł" => "l", "Ŋ" => "N", "ʼn" => "n", "ŋ" => "n", "Ø" => "O", "ø" => "o", "ſ" => "s", "Þ" => "T", "Ŧ" => "T", "þ" => "t", "ŧ" => "t", "ä" => "ae", "ö" => "oe", "ü" => "ue", "Ä" => "AE", "Ö" => "OE", "Ü" => "UE");
     $text = strtr($text, $tr);
     // Drop separated diacritics
     $text = preg_replace('/\\p{M}/u', '', $text);
     // Drop extraneous whitespace
     $text = preg_replace('/(\\s)\\s+/u', '$1', $text);
     // Drop leading and trailing whitespace
     $text = trim($text);
     if (false && class_exists('IntlBreakIterator')) {
         // Split by word boundaries
         if ($tokenizer = IntlBreakIterator::createWordInstance($lang ?: ($cfg ? $cfg->getSystemLanguage() : 'en_US'))) {
             $tokenizer->setText($text);
             $tokens = array();
             foreach ($tokenizer as $token) {
                 $tokens[] = $token;
             }
             $text = implode(' ', $tokens);
         }
     } else {
         // Approximate word boundaries from Unicode chart at
         // http://www.unicode.org/reports/tr29/#Word_Boundaries
         // Punt for now
     }
     return $text;
 }
コード例 #5
0
ファイル: cpbi_clone_equality.php プロジェクト: badlamer/hhvm
<?php

ini_set("intl.error_level", E_WARNING);
ini_set("intl.default_locale", "pt_PT");
$text = 'ตัวอย่างข้อความ';
$text2 = 'foo';
$it = IntlBreakIterator::createCodePointInstance();
$it->setText($text);
$it_clone = clone $it;
var_dump($it == $it_clone);
$it->setText($text2);
var_dump($it == $it_clone);
$it_clone->setText($text2);
var_dump($it == $it_clone);
?>
==DONE==
コード例 #6
0
<?php

ini_set("intl.error_level", E_WARNING);
ini_set("intl.default_locale", "pt_PT");
$it = IntlBreakIterator::createWordInstance(NULL);
var_dump($it->getPartsIterator(array()));
var_dump($it->getPartsIterator(1, 2));
var_dump($it->getPartsIterator(-1));
?>
==DONE==
コード例 #7
0
<?php

ini_set("intl.error_level", E_WARNING);
ini_set("intl.default_locale", "pt_PT");
$bi = IntlBreakIterator::createSentenceInstance('pt');
var_dump($bi->getLocale(0));
var_dump($bi->getLocale(1));
?>
==DONE==
コード例 #8
0
<?php

ini_set("intl.error_level", E_WARNING);
var_dump(IntlBreakIterator::createWordInstance(array()));
var_dump(IntlBreakIterator::createSentenceInstance(NULL, 2));
var_dump(IntlBreakIterator::createCharacterInstance(NULL, 2));
var_dump(IntlBreakIterator::createTitleInstance(NULL, 2));
var_dump(IntlBreakIterator::createLineInstance(NULL, 2));
コード例 #9
0
<?php

ini_set("intl.error_level", E_WARNING);
var_dump(IntlBreakIterator::createCodePointInstance(array()));