public function testLoadUserDict() { $case_array = array("李小福", "是", "创新办", "主任", "也", "是", "云计算", "方面", "的", "专家"); Jieba::loadUserDict(dirname(dirname(__FILE__)) . '/src/dict/user_dict.txt'); $seg_list = Jieba::cut("李小福是创新办主任也是云计算方面的专家"); $this->assertEquals($case_array, $seg_list); }
public function getPartofspeech($string) { Jieba::init(); Finalseg::init(); Posseg::init(); $seg_list = Posseg::cut($string); $this->cixing = $seg_list; return $seg_list; }
/** * Static method cut * * @param string $sentence # input sentence * @param boolean $cut_all # cut_all or not * @param array $options # other options * * @return array $seg_list */ public static function cut($sentence, $cut_all = false, $options = array()) { $defaults = array('mode' => 'default'); $options = array_merge($defaults, $options); $seg_list = array(); $re_han_pattern = '([\\x{4E00}-\\x{9FA5}]+)'; $re_skip_pattern = '([a-zA-Z0-9+#\\n]+)'; preg_match_all('/(' . $re_han_pattern . '|' . $re_skip_pattern . ')/u', $sentence, $matches, PREG_PATTERN_ORDER); $blocks = $matches[0]; foreach ($blocks as $blk) { if (preg_match('/' . $re_han_pattern . '/u', $blk)) { if ($cut_all) { $words = Jieba::__cutAll($blk); } else { $words = Jieba::__cutDAG($blk); } foreach ($words as $word) { array_push($seg_list, $word); } } else { array_push($seg_list, $blk); } // end else (preg_match('/'.$re_han_pattern.'/u', $blk)) } // end foreach ($blocks as $blk) return $seg_list; }
/** * Static method cutForSearch * * @param string $sentence # input sentence * @param array $options # other options * * @return array $seg_list */ public static function cutForSearch($sentence, $options = array()) { $defaults = array('mode' => 'default'); $options = array_merge($defaults, $options); $seg_list = array(); $cut_seg_list = Jieba::cut($sentence); foreach ($cut_seg_list as $w) { $len = mb_strlen($w, 'UTF-8'); if ($len > 2) { for ($i = 0; $i < $len - 1; $i++) { $gram2 = mb_substr($w, $i, 2, 'UTF-8'); if (isset(self::$FREQ[$gram2])) { array_push($seg_list, $gram2); } } } if (mb_strlen($w, 'UTF-8') > 3) { for ($i = 0; $i < $len - 2; $i++) { $gram3 = mb_substr($w, $i, 3, 'UTF-8'); if (isset(self::$FREQ[$gram3])) { array_push($seg_list, $gram3); } } } array_push($seg_list, $w); } return $seg_list; }
#!/usr/bin/php <?php /** * demo_extract_tags.php * * PHP version 5 * * @category PHP * @package /src/cmd/ * @author Fukuball Lin <*****@*****.**> * @license MIT Licence * @version GIT: <fukuball/jieba-php> * @link https://github.com/fukuball/jieba-php */ ini_set('memory_limit', '600M'); require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/MultiArray.php"; require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/Factory/MultiArrayFactory.php"; require_once dirname(dirname(__FILE__)) . "/class/Jieba.php"; require_once dirname(dirname(__FILE__)) . "/class/Finalseg.php"; require_once dirname(dirname(__FILE__)) . "/class/JiebaAnalyse.php"; use Fukuball\Jieba\Jieba; use Fukuball\Jieba\Finalseg; use Fukuball\Jieba\JiebaAnalyse; Jieba::init(array('mode' => 'test', 'dict' => 'samll')); Finalseg::init(); JiebaAnalyse::init(); $top_k = 10; $content = file_get_contents(dirname(dirname(__FILE__)) . "/dict/lyric.txt", "r"); $tags = JiebaAnalyse::extractTags($content, $top_k); var_dump($tags);
#!/usr/bin/php <?php /** * demo_posseg.php * * PHP version 5 * * @category PHP * @package /src/cmd/ * @author Fukuball Lin <*****@*****.**> * @license MIT Licence * @version GIT: <fukuball/jieba-php> * @link https://github.com/fukuball/jieba-php */ ini_set('memory_limit', '600M'); require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/MultiArray.php"; require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/Factory/MultiArrayFactory.php"; require_once dirname(dirname(__FILE__)) . "/class/Jieba.php"; require_once dirname(dirname(__FILE__)) . "/class/Finalseg.php"; require_once dirname(dirname(__FILE__)) . "/class/Posseg.php"; use Fukuball\Jieba\Jieba; use Fukuball\Jieba\Finalseg; use Fukuball\Jieba\Posseg; Jieba::init(); Finalseg::init(); Posseg::init(); $seg_list = Posseg::cut("这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。"); var_dump($seg_list); $seg_list = Posseg::posTagReadable($seg_list); var_dump($seg_list);
#!/usr/bin/php <?php /** * demo_extract_tags.php * * PHP version 5 * * @category PHP * @package /src/cmd/ * @author Fukuball Lin <*****@*****.**> * @license MIT Licence * @version GIT: <fukuball/jieba-php> * @link https://github.com/fukuball/jieba-php */ ini_set('memory_limit', '1024M'); require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/MultiArray.php"; require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/Factory/MultiArrayFactory.php"; require_once dirname(dirname(__FILE__)) . "/class/Jieba.php"; require_once dirname(dirname(__FILE__)) . "/class/Finalseg.php"; require_once dirname(dirname(__FILE__)) . "/class/JiebaAnalyse.php"; use Fukuball\Jieba\Jieba; use Fukuball\Jieba\Finalseg; use Fukuball\Jieba\JiebaAnalyse; Jieba::init(array('mode' => 'test', 'dict' => 'big')); Finalseg::init(); JiebaAnalyse::init(); $top_k = 10; $content = file_get_contents(dirname(dirname(__FILE__)) . "/dict/lyric.txt", "r"); $tags = JiebaAnalyse::extractTags($content, $top_k); var_dump($tags);
* * PHP version 5 * * @category PHP * @package /src/cmd/ * @author Fukuball Lin <*****@*****.**> * @license MIT Licence * @version GIT: <fukuball/jieba-php> * @link https://github.com/fukuball/jieba-php */ ini_set('memory_limit', '600M'); require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/MultiArray.php"; require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/Factory/MultiArrayFactory.php"; require_once dirname(dirname(__FILE__)) . "/class/Jieba.php"; require_once dirname(dirname(__FILE__)) . "/class/Finalseg.php"; use Fukuball\Jieba\Jieba; use Fukuball\Jieba\Finalseg; Jieba::init(array('mode' => 'test', 'dict' => 'samll')); Finalseg::init(); $seg_list = Jieba::cut("怜香惜玉也得要看对象啊!"); var_dump($seg_list); echo "Full Mode: \n"; $seg_list = Jieba::cut("我来到北京清华大学", true); var_dump($seg_list); echo "Default Mode: \n"; $seg_list = Jieba::cut("我来到北京清华大学", false); var_dump($seg_list); $seg_list = Jieba::cut("他来到了网易杭研大厦"); var_dump($seg_list); $seg_list = Jieba::cutForSearch("小明硕士毕业于中国科学院计算所,后在日本京都大学深造"); var_dump($seg_list);
#!/usr/bin/php <?php /** * demo_user_dict.php * * PHP version 5 * * @category PHP * @package /src/cmd/ * @author Fukuball Lin <*****@*****.**> * @license MIT Licence * @version GIT: <fukuball/jieba-php> * @link https://github.com/fukuball/jieba-php */ ini_set('memory_limit', '600M'); require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/MultiArray.php"; require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/Factory/MultiArrayFactory.php"; require_once dirname(dirname(__FILE__)) . "/class/Jieba.php"; require_once dirname(dirname(__FILE__)) . "/class/Finalseg.php"; use Fukuball\Jieba\Jieba; use Fukuball\Jieba\Finalseg; Jieba::init(array('mode' => 'test', 'dict' => 'samll')); Finalseg::init(); $seg_list = Jieba::cut("李小福是创新办主任也是云计算方面的专家"); var_dump($seg_list); Jieba::loadUserDict(dirname(dirname(__FILE__)) . '/dict/user_dict.txt'); $seg_list = Jieba::cut("李小福是创新办主任也是云计算方面的专家"); var_dump($seg_list);
require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/Factory/MultiArrayFactory.php"; require_once dirname(dirname(__FILE__)) . "/class/Jieba.php"; require_once dirname(dirname(__FILE__)) . "/class/Finalseg.php"; use Fukuball\Jieba\Jieba; use Fukuball\Jieba\Finalseg; Jieba::init(array('mode' => 'test', 'dict' => 'big')); Finalseg::init(); $seg_list = Jieba::cut("怜香惜玉也得要看对象啊!"); var_dump($seg_list); $seg_list = Jieba::cut("憐香惜玉也得要看對象啊!"); var_dump($seg_list); echo "Full Mode: \n"; $seg_list = Jieba::cut("我来到北京清华大学", true); var_dump($seg_list); echo "Full Mode: \n"; $seg_list = Jieba::cut("我來到北京清華大學", true); var_dump($seg_list); echo "Default Mode: \n"; $seg_list = Jieba::cut("我来到北京清华大学", false); var_dump($seg_list); echo "Default Mode: \n"; $seg_list = Jieba::cut("我來到北京清華大學", false); var_dump($seg_list); $seg_list = Jieba::cut("他来到了网易杭研大厦"); var_dump($seg_list); $seg_list = Jieba::cut("他來到了網易杭研大廈"); var_dump($seg_list); $seg_list = Jieba::cutForSearch("小明硕士毕业于中国科学院计算所,后在日本京都大学深造"); var_dump($seg_list); $seg_list = Jieba::cutForSearch("小明碩士畢業于中國科學院計算所,後在日本京都大學深造"); var_dump($seg_list);
/** * demo.php * * PHP version 5 * * @category PHP * @package /src/cmd/ * @author Fukuball Lin <*****@*****.**> * @license MIT Licence * @version GIT: <fukuball/jieba-php> * @link https://github.com/fukuball/jieba-php */ ini_set('memory_limit', '600M'); require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/MultiArray.php"; require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/Factory/MultiArrayFactory.php"; require_once dirname(dirname(__FILE__)) . "/class/Jieba.php"; require_once dirname(dirname(__FILE__)) . "/class/Finalseg.php"; use Fukuball\Jieba\Jieba; use Fukuball\Jieba\Finalseg; Jieba::init(array('mode' => 'test', 'dict' => 'samll')); Finalseg::init(); $seg_list = Jieba::cut("怜香惜玉也得要看对象啊!"); var_dump($seg_list); echo "Full Mode: \n"; $seg_list = Jieba::cut("我来到北京清华大学", true); var_dump($seg_list); echo "Default Mode: \n"; $seg_list = Jieba::cut("我来到北京清华大学", false); var_dump($seg_list); $seg_list = Jieba::cut("他来到了网易杭研大厦"); var_dump($seg_list);