コード例 #1
0
ファイル: Jieba.php プロジェクト: 9466/jieba-php
 /**
  * Static method __cutDAG
  *
  * @param string $sentence # input sentence
  * @param array  $options  # other options
  *
  * @return array $words
  */
 public static function __cutDAG($sentence, $options = array())
 {
     $defaults = array('mode' => 'default');
     $options = array_merge($defaults, $options);
     $words = array();
     $N = mb_strlen($sentence, 'UTF-8');
     $i = 0;
     $j = 0;
     $p = self::$trie;
     $DAG = array();
     $word_c = array();
     while ($i < $N) {
         $c = mb_substr($sentence, $j, 1, 'UTF-8');
         if (count($word_c) == 0) {
             $next_word_key = $c;
         } else {
             $next_word_key = implode('.', $word_c) . '.' . $c;
         }
         if (self::$trie->exists($next_word_key)) {
             array_push($word_c, $c);
             $next_word_key_value = self::$trie->get($next_word_key);
             if ($next_word_key_value == array("end" => "") || isset($next_word_key_value["end"]) || isset($next_word_key_value[0]["end"])) {
                 if (!isset($DAG[$i])) {
                     $DAG[$i] = array();
                 }
                 array_push($DAG[$i], $j);
             }
             $j += 1;
             if ($j >= $N) {
                 $word_c = array();
                 $i += 1;
                 $j = $i;
             }
         } else {
             $word_c = array();
             $i += 1;
             $j = $i;
         }
     }
     for ($i = 0; $i < $N; $i++) {
         if (!isset($DAG[$i])) {
             $DAG[$i] = array($i);
         }
     }
     self::calc($sentence, $DAG);
     $x = 0;
     $buf = '';
     while ($x < $N) {
         $current_route_keys = array_keys(self::$route[$x]);
         $y = $current_route_keys[0] + 1;
         $l_word = mb_substr($sentence, $x, $y - $x, 'UTF-8');
         if ($y - $x == 1) {
             $buf = $buf . $l_word;
         } else {
             if (mb_strlen($buf, 'UTF-8') > 0) {
                 if (mb_strlen($buf, 'UTF-8') == 1) {
                     array_push($words, $buf);
                     $buf = '';
                 } else {
                     $regognized = Finalseg::__cut($buf);
                     foreach ($regognized as $key => $word) {
                         array_push($words, $word);
                     }
                     $buf = '';
                 }
             }
             array_push($words, $l_word);
         }
         $x = $y;
     }
     if (mb_strlen($buf, 'UTF-8') > 0) {
         if (mb_strlen($buf, 'UTF-8') == 1) {
             array_push($words, $buf);
         } else {
             $regognized = Finalseg::__cut($buf);
             foreach ($regognized as $key => $word) {
                 array_push($words, $word);
             }
         }
     }
     return $words;
 }
コード例 #2
0
ファイル: demo_extract_tags.php プロジェクト: 9466/jieba-php
#!/usr/bin/php
<?php 
/**
 * demo_extract_tags.php
 *
 * PHP version 5
 *
 * @category PHP
 * @package  /src/cmd/
 * @author   Fukuball Lin <*****@*****.**>
 * @license  MIT Licence
 * @version  GIT: <fukuball/jieba-php>
 * @link     https://github.com/fukuball/jieba-php
 */
ini_set('memory_limit', '600M');
require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/MultiArray.php";
require_once dirname(dirname(__FILE__)) . "/vendor/multi-array/Factory/MultiArrayFactory.php";
require_once dirname(dirname(__FILE__)) . "/class/Jieba.php";
require_once dirname(dirname(__FILE__)) . "/class/Finalseg.php";
require_once dirname(dirname(__FILE__)) . "/class/JiebaAnalyse.php";
use Fukuball\Jieba;
use Fukuball\Finalseg;
use Fukuball\JiebaAnalyse;
Jieba::init(array('mode' => 'test', 'dict' => 'samll'));
Finalseg::init();
JiebaAnalyse::init();
$top_k = 10;
$content = file_get_contents(dirname(dirname(__FILE__)) . "/dict/lyric.txt", "r");
$tags = JiebaAnalyse::extractTags($content, $top_k);
var_dump($tags);
コード例 #3
0
ファイル: JiebaTest.php プロジェクト: 9466/jieba-php
 public function testFinalsegCut()
 {
     $case_array = array("怜香惜", "玉", "也", "得", "要", "看", "对象", "啊");
     $seg_list = Finalseg::cut("怜香惜玉也得要看对象啊!");
     $this->assertEquals($case_array, $seg_list);
 }