Author: Fukuball Lin (fukuball@gmail.com)
Exemplo n.º 1
0
 public function testLoadUserDict()
 {
     $case_array = array("李小福", "是", "创新办", "主任", "也", "是", "云计算", "方面", "的", "专家");
     Jieba::loadUserDict(dirname(dirname(__FILE__)) . '/src/dict/user_dict.txt');
     $seg_list = Jieba::cut("李小福是创新办主任也是云计算方面的专家");
     $this->assertEquals($case_array, $seg_list);
 }
Exemplo n.º 2
0
 public function getPartofspeech($string)
 {
     Jieba::init();
     Finalseg::init();
     Posseg::init();
     $seg_list = Posseg::cut($string);
     $this->cixing = $seg_list;
     return $seg_list;
 }
Exemplo n.º 3
0
 /**
  * Static method cut
  *
  * @param string  $sentence # input sentence
  * @param boolean $cut_all  # cut_all or not
  * @param array   $options  # other options
  *
  * @return array $seg_list
  */
 public static function cut($sentence, $cut_all = false, $options = array())
 {
     $defaults = array('mode' => 'default');
     $options = array_merge($defaults, $options);
     $seg_list = array();
     $re_han_pattern = '([\\x{4E00}-\\x{9FA5}]+)';
     $re_skip_pattern = '([a-zA-Z0-9+#\\n]+)';
     preg_match_all('/(' . $re_han_pattern . '|' . $re_skip_pattern . ')/u', $sentence, $matches, PREG_PATTERN_ORDER);
     $blocks = $matches[0];
     foreach ($blocks as $blk) {
         if (preg_match('/' . $re_han_pattern . '/u', $blk)) {
             if ($cut_all) {
                 $words = Jieba::__cutAll($blk);
             } else {
                 $words = Jieba::__cutDAG($blk);
             }
             foreach ($words as $word) {
                 array_push($seg_list, $word);
             }
         } else {
             array_push($seg_list, $blk);
         }
         // end else (preg_match('/'.$re_han_pattern.'/u', $blk))
     }
     // end foreach ($blocks as $blk)
     return $seg_list;
 }
Exemplo n.º 4
0
 /**
  * Static method cutForSearch
  *
  * @param string  $sentence # input sentence
  * @param array   $options  # other options
  *
  * @return array $seg_list
  */
 public static function cutForSearch($sentence, $options = array())
 {
     $defaults = array('mode' => 'default');
     $options = array_merge($defaults, $options);
     $seg_list = array();
     $cut_seg_list = Jieba::cut($sentence);
     foreach ($cut_seg_list as $w) {
         $len = mb_strlen($w, 'UTF-8');
         if ($len > 2) {
             for ($i = 0; $i < $len - 1; $i++) {
                 $gram2 = mb_substr($w, $i, 2, 'UTF-8');
                 if (isset(self::$FREQ[$gram2])) {
                     array_push($seg_list, $gram2);
                 }
             }
         }
         if (mb_strlen($w, 'UTF-8') > 3) {
             for ($i = 0; $i < $len - 2; $i++) {
                 $gram3 = mb_substr($w, $i, 3, 'UTF-8');
                 if (isset(self::$FREQ[$gram3])) {
                     array_push($seg_list, $gram3);
                 }
             }
         }
         array_push($seg_list, $w);
     }
     return $seg_list;
 }
Exemplo n.º 5
0
#!/usr/bin/php
<?php 
/**
 * demo_extract_tags.php
 *
 * PHP version 5
 *
 * @category PHP
 * @package  /src/cmd/
 * @author   Fukuball Lin <*****@*****.**>
 * @license  MIT Licence
 * @version  GIT: <fukuball/jieba-php>
 * @link     https://github.com/fukuball/jieba-php
 */
ini_set('memory_limit', '600M');
require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/MultiArray.php";
require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/Factory/MultiArrayFactory.php";
require_once dirname(dirname(__FILE__)) . "/class/Jieba.php";
require_once dirname(dirname(__FILE__)) . "/class/Finalseg.php";
require_once dirname(dirname(__FILE__)) . "/class/JiebaAnalyse.php";
use Fukuball\Jieba\Jieba;
use Fukuball\Jieba\Finalseg;
use Fukuball\Jieba\JiebaAnalyse;
Jieba::init(array('mode' => 'test', 'dict' => 'samll'));
Finalseg::init();
JiebaAnalyse::init();
$top_k = 10;
$content = file_get_contents(dirname(dirname(__FILE__)) . "/dict/lyric.txt", "r");
$tags = JiebaAnalyse::extractTags($content, $top_k);
var_dump($tags);
Exemplo n.º 6
0
#!/usr/bin/php
<?php 
/**
 * demo_posseg.php
 *
 * PHP version 5
 *
 * @category PHP
 * @package  /src/cmd/
 * @author   Fukuball Lin <*****@*****.**>
 * @license  MIT Licence
 * @version  GIT: <fukuball/jieba-php>
 * @link     https://github.com/fukuball/jieba-php
 */
ini_set('memory_limit', '600M');
require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/MultiArray.php";
require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/Factory/MultiArrayFactory.php";
require_once dirname(dirname(__FILE__)) . "/class/Jieba.php";
require_once dirname(dirname(__FILE__)) . "/class/Finalseg.php";
require_once dirname(dirname(__FILE__)) . "/class/Posseg.php";
use Fukuball\Jieba\Jieba;
use Fukuball\Jieba\Finalseg;
use Fukuball\Jieba\Posseg;
Jieba::init();
Finalseg::init();
Posseg::init();
$seg_list = Posseg::cut("这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。");
var_dump($seg_list);
$seg_list = Posseg::posTagReadable($seg_list);
var_dump($seg_list);
Exemplo n.º 7
0
#!/usr/bin/php
<?php 
/**
 * demo_extract_tags.php
 *
 * PHP version 5
 *
 * @category PHP
 * @package  /src/cmd/
 * @author   Fukuball Lin <*****@*****.**>
 * @license  MIT Licence
 * @version  GIT: <fukuball/jieba-php>
 * @link     https://github.com/fukuball/jieba-php
 */
ini_set('memory_limit', '1024M');
require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/MultiArray.php";
require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/Factory/MultiArrayFactory.php";
require_once dirname(dirname(__FILE__)) . "/class/Jieba.php";
require_once dirname(dirname(__FILE__)) . "/class/Finalseg.php";
require_once dirname(dirname(__FILE__)) . "/class/JiebaAnalyse.php";
use Fukuball\Jieba\Jieba;
use Fukuball\Jieba\Finalseg;
use Fukuball\Jieba\JiebaAnalyse;
Jieba::init(array('mode' => 'test', 'dict' => 'big'));
Finalseg::init();
JiebaAnalyse::init();
$top_k = 10;
$content = file_get_contents(dirname(dirname(__FILE__)) . "/dict/lyric.txt", "r");
$tags = JiebaAnalyse::extractTags($content, $top_k);
var_dump($tags);
Exemplo n.º 8
0
 *
 * PHP version 5
 *
 * @category PHP
 * @package  /src/cmd/
 * @author   Fukuball Lin <*****@*****.**>
 * @license  MIT Licence
 * @version  GIT: <fukuball/jieba-php>
 * @link     https://github.com/fukuball/jieba-php
 */
ini_set('memory_limit', '600M');
require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/MultiArray.php";
require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/Factory/MultiArrayFactory.php";
require_once dirname(dirname(__FILE__)) . "/class/Jieba.php";
require_once dirname(dirname(__FILE__)) . "/class/Finalseg.php";
use Fukuball\Jieba\Jieba;
use Fukuball\Jieba\Finalseg;
Jieba::init(array('mode' => 'test', 'dict' => 'samll'));
Finalseg::init();
$seg_list = Jieba::cut("怜香惜玉也得要看对象啊!");
var_dump($seg_list);
echo "Full Mode: \n";
$seg_list = Jieba::cut("我来到北京清华大学", true);
var_dump($seg_list);
echo "Default Mode: \n";
$seg_list = Jieba::cut("我来到北京清华大学", false);
var_dump($seg_list);
$seg_list = Jieba::cut("他来到了网易杭研大厦");
var_dump($seg_list);
$seg_list = Jieba::cutForSearch("小明硕士毕业于中国科学院计算所,后在日本京都大学深造");
var_dump($seg_list);
Exemplo n.º 9
0
#!/usr/bin/php
<?php 
/**
 * demo_user_dict.php
 *
 * PHP version 5
 *
 * @category PHP
 * @package  /src/cmd/
 * @author   Fukuball Lin <*****@*****.**>
 * @license  MIT Licence
 * @version  GIT: <fukuball/jieba-php>
 * @link     https://github.com/fukuball/jieba-php
 */
ini_set('memory_limit', '600M');
require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/MultiArray.php";
require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/Factory/MultiArrayFactory.php";
require_once dirname(dirname(__FILE__)) . "/class/Jieba.php";
require_once dirname(dirname(__FILE__)) . "/class/Finalseg.php";
use Fukuball\Jieba\Jieba;
use Fukuball\Jieba\Finalseg;
Jieba::init(array('mode' => 'test', 'dict' => 'samll'));
Finalseg::init();
$seg_list = Jieba::cut("李小福是创新办主任也是云计算方面的专家");
var_dump($seg_list);
Jieba::loadUserDict(dirname(dirname(__FILE__)) . '/dict/user_dict.txt');
$seg_list = Jieba::cut("李小福是创新办主任也是云计算方面的专家");
var_dump($seg_list);
Exemplo n.º 10
0
require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/Factory/MultiArrayFactory.php";
require_once dirname(dirname(__FILE__)) . "/class/Jieba.php";
require_once dirname(dirname(__FILE__)) . "/class/Finalseg.php";
use Fukuball\Jieba\Jieba;
use Fukuball\Jieba\Finalseg;
Jieba::init(array('mode' => 'test', 'dict' => 'big'));
Finalseg::init();
$seg_list = Jieba::cut("怜香惜玉也得要看对象啊!");
var_dump($seg_list);
$seg_list = Jieba::cut("憐香惜玉也得要看對象啊!");
var_dump($seg_list);
echo "Full Mode: \n";
$seg_list = Jieba::cut("我来到北京清华大学", true);
var_dump($seg_list);
echo "Full Mode: \n";
$seg_list = Jieba::cut("我來到北京清華大學", true);
var_dump($seg_list);
echo "Default Mode: \n";
$seg_list = Jieba::cut("我来到北京清华大学", false);
var_dump($seg_list);
echo "Default Mode: \n";
$seg_list = Jieba::cut("我來到北京清華大學", false);
var_dump($seg_list);
$seg_list = Jieba::cut("他来到了网易杭研大厦");
var_dump($seg_list);
$seg_list = Jieba::cut("他來到了網易杭研大廈");
var_dump($seg_list);
$seg_list = Jieba::cutForSearch("小明硕士毕业于中国科学院计算所,后在日本京都大学深造");
var_dump($seg_list);
$seg_list = Jieba::cutForSearch("小明碩士畢業于中國科學院計算所,後在日本京都大學深造");
var_dump($seg_list);
Exemplo n.º 11
0
/**
 * demo.php
 *
 * PHP version 5
 *
 * @category PHP
 * @package  /src/cmd/
 * @author   Fukuball Lin <*****@*****.**>
 * @license  MIT Licence
 * @version  GIT: <fukuball/jieba-php>
 * @link     https://github.com/fukuball/jieba-php
 */
ini_set('memory_limit', '600M');
require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/MultiArray.php";
require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/Factory/MultiArrayFactory.php";
require_once dirname(dirname(__FILE__)) . "/class/Jieba.php";
require_once dirname(dirname(__FILE__)) . "/class/Finalseg.php";
use Fukuball\Jieba\Jieba;
use Fukuball\Jieba\Finalseg;
Jieba::init(array('mode' => 'test', 'dict' => 'samll'));
Finalseg::init();
$seg_list = Jieba::cut("怜香惜玉也得要看对象啊!");
var_dump($seg_list);
echo "Full Mode: \n";
$seg_list = Jieba::cut("我来到北京清华大学", true);
var_dump($seg_list);
echo "Default Mode: \n";
$seg_list = Jieba::cut("我来到北京清华大学", false);
var_dump($seg_list);
$seg_list = Jieba::cut("他来到了网易杭研大厦");
var_dump($seg_list);