Beispiel #1
0
<?php

/**
 * php-mecab/examples
 * dump all nodes (OO-API)
 * charset=utf-8
 */
require_once dirname(__FILE__) . '/common.inc.php';
$mecab = new MeCab_Tagger($arg);
function call_format($node)
{
    return format($node->getSurface(), $node->getFeature(), $node->getId(), $node->getStat());
}
if ($node = $mecab->parseToNode($str)) {
    border();
    while ($node) {
        $ar = array('node' => $node->toArray(), 'prev' => '-', 'next' => '-', 'enext' => '-', 'bnext' => '-', 'rpath' => '-', 'lpath' => '-');
        if ($prev = $node->getPrev()) {
            $ar['prev'] = call_format($prev);
        }
        if ($next = $node->getNext()) {
            $ar['next'] = call_format($next);
        }
        if ($enext = $node->getENext()) {
            $ar['enext'] = call_format($enext);
        }
        if ($bnext = $node->getBNext()) {
            $ar['bnext'] = call_format($bnext);
        }
        if ($rpath = $node->getRPath()) {
            $ar['rpath'] = array('prob' => $rpath->getProb(), 'cost' => $rpath->getCost(), 'rnode' => '-', 'lnode' => '-');
Beispiel #2
0
<?php

/**
 * php-mecab/examples
 * parse N-Best (OO-API)
 * charset=utf-8
 */
require_once dirname(__FILE__) . '/common.inc.php';
$mecab = new MeCab_Tagger($arg);
if ($mecab->parseNBestInit($str_long)) {
    border();
    for ($i = 0; $i < NBEST_MAX_RESULT && ($next = $mecab->next()); $i++) {
        echo $next;
        border();
    }
}
Beispiel #3
0
                 // フィルター
                 if ($key == 0) {
                     array_push($time_line_texts, $resolved);
                 }
             }
         }
     }
 }
 foreach ($jset3['statuses'] as $result) {
     $timeee = date('H', strtotime((string) $result['user']['created_at']));
     //時間を取得
     $lists = array("time" => $timeee, "id" => $result['id_str'], "userID" => $result['user']['screen_name'], "name" => $result['user']['name'], "link" => $result['user']['profile_image_url'], "text" => $result['text']);
     // ツイートのリストをliで出力
     array_push($list, $lists);
     ini_set('mecab.default_userdic', 'onomasticon2.dic');
     $mecab = new MeCab_Tagger();
     for ($node = $mecab->parseToNode($result['text']); $node; $node = $node->getNext()) {
         if ($node->getStat() != 2 && $node->getStat() != 3) {
             $resolved = array("Surface" => $node->getSurface(), "Feature" => $node->getFeature(), "Time" => $timeee);
             $feature = '/固有名詞(.*)/u';
             // フィルターに掛ける品詞   形容詞(.*)|感動詞(.*)|
             if (preg_match($feature, $resolved["Feature"])) {
                 // フィルター
                 unset($resolved["Feature"]);
                 $search = '/http|RT|\\w+|w+|笑|bot|あ+|あー|い+|え+|お+|ない|ー/u';
                 // フィルターに掛ける文字
                 $key = preg_match($search, $resolved["Surface"]);
                 // フィルター
                 if ($key == 0) {
                     array_push($time_line_texts, $resolved);
                 }
Beispiel #4
0
<?php

/**
 * php-mecab/examples
 * dump all nodes (OO-API with SPL and Overloading)
 * charset=utf-8
 */
require_once dirname(__FILE__) . '/common.inc.php';
$mecab = new MeCab_Tagger($arg);
function call_format($node)
{
    return format($node->surface, $node->feature, $node->id, $node->stat);
}
if ($iter = $mecab->parseToNode($str)) {
    border();
    foreach ($iter as $node) {
        $ar = array('node' => $node->toArray(), 'prev' => '-', 'next' => '-', 'enext' => '-', 'bnext' => '-', 'rpath' => '-', 'lpath' => '-');
        if ($node->prev) {
            $ar['prev'] = call_format($node->prev);
        }
        if ($node->next) {
            $ar['next'] = call_format($node->next);
        }
        if ($node->enext) {
            $ar['enext'] = call_format($node->enext);
        }
        if ($node->bnext) {
            $ar['bnext'] = call_format($node->bnext);
        }
        if ($rpath = $node->rpath) {
            $ar['rpath'] = array('prob' => $rpath->prob, 'cost' => $rpath->cost, 'rnode' => '-', 'lnode' => '-');
Beispiel #5
0
<?php

/**
 * php-mecab/examples
 * parse string, wakati output format (OO-API)
 * charset=utf-8
 */
require_once dirname(__FILE__) . '/common.inc.php';
$mecab = new MeCab_Tagger($arg_all_morphs);
border();
echo $mecab->parse($str);
border();
Beispiel #6
0
function funcGetKana($str)
{
    $mecab = new MeCab_Tagger();
    $kna = "";
    for ($node = $mecab->parseToNode($str); $node; $node = $node->getNext()) {
        if ($node->getStat() != 2 && $node->getStat() != 3) {
            $word = $node->getSurface();
            switch (true) {
                case isZenKana($word, "UTF-8"):
                    $kna .= $word;
                    //echo "全角カタカナ".$word."<br />";
                    break;
                case isHanKana($word, "UTF-8"):
                    $kna .= mb_convert_kana($word, "K");
                    //echo "半角カタカナ".$word."<br />";
                    break;
                case isHiragana($word, "UTF-8"):
                    $kna .= mb_convert_kana($word, "C");
                    //echo "ひらがな".$word."<br />";
                    break;
                case isZenNum($word, "UTF-8"):
                    $kna .= $word;
                    //echo "全角数値".$word."<br />";
                    break;
                case isHanNum($word, "UTF-8"):
                    $kna .= mb_convert_kana($word, "N");
                    //echo "半角数値".$word."<br />";
                    break;
                case isZenEiji($word, "UTF-8"):
                    $kna .= $word;
                    //echo "全角英字".$word."<br />";
                    break;
                case isHanEiji($word, "UTF-8"):
                    $kna .= mb_convert_kana($word, "R");
                    //echo "半角英字".$word."<br />";
                    break;
                case isKigou($word, "UTF-8"):
                    $kna .= $word;
                    //echo "記号".$word."<br />";
                    break;
                default:
                    $fet = $node->getFeature();
                    $fettbl = explode(",", $fet);
                    $kna .= $fettbl[7];
            }
        }
    }
    //for
    return $kna;
}
Beispiel #7
0
<?php

/**
 * php-mecab/examples
 * parse string, wakati output format (OO-API with SPL and autocast)
 * charset=utf-8
 */
require_once dirname(__FILE__) . '/common.inc.php';
$mecab = new MeCab_Tagger($arg_format);
border();
foreach ($mecab->parseToNode($str_long) as $node) {
    echo $mecab->formatNode($node);
}
border();
foreach ($mecab->parseToNode($str_long) as $node) {
    echo $node;
}
border();
Beispiel #8
0
/**
 * php-mecab/examples
 * test like official bindings examples (OO-API)
 * charset=utf-8
 */
require_once dirname(__FILE__) . '/common.inc.php';
$sentence = '太郎はこの本を二郎を見た女性に渡した。';
if (isset($_SERVER['argv'])) {
    $options = $_SERVER['argv'];
    array_shift($options);
} else {
    $options = array();
}
writeln(MeCab::VERSION);
$t = new MeCab_Tagger($options);
writeln($t->parse($sentence));
foreach ($t->parseToNode($sentence) as $m) {
    writeln($m->surface . "\t" . $m->feature);
}
writeln('EOS');
$di = $t->dictionaryInfo();
foreach ($di as $d) {
    writefln('filename: %s', $d['filename']);
    writefln('charset: %s', $d['charset']);
    writefln('size: %d', $d['size']);
    writefln('type: %d', $d['type']);
    writefln('lsize: %d', $d['lsize']);
    writefln('rsize: %d', $d['rsize']);
    writefln('version: %d', $d['version']);
}
Beispiel #9
0
<?php

/**
 * php-mecab/examples
 * show dictionary information (OO-API)
 * charset=utf-8
 */
require_once dirname(__FILE__) . '/common.inc.php';
$mecab = new MeCab_Tagger($arg);
border();
writefln('MeCab Version: %s', MeCab::VERSION);
border();
writeln('Dictionary Information:');
print_r($mecab->dictionaryInfo());
border();
Beispiel #10
0
<?php

/**
 * php-mecab/examples
 * parse string (OO-API, persistent)
 * charset=utf-8
 */
require_once dirname(__FILE__) . '/common.inc.php';
$mecab = new MeCab_Tagger(null, true);
border();
echo $mecab->parse($str_long);
border();
Beispiel #11
0
<?php

/**
 * php-mecab/examples
 * parse string, wakati output format (OO-API)
 * charset=utf-8
 */
require_once dirname(__FILE__) . '/common.inc.php';
$mecab = new MeCab_Tagger($arg_format);
border();
$node = $mecab->parseToNode($str_long);
while ($node) {
    echo $mecab->formatNode($node);
    $node = $node->getNext();
}
border();
$node = $mecab->parseToNode($str_long);
while ($node) {
    echo $node->toString();
    $node = $node->getNext();
}
border();
Beispiel #12
0
function create($input)
{
    $lists = array();
    $completion_data = array("name" => "tagcloud", "children" => array());
    // タグクラウドで使うデータ(配列)
    $time_line_texts = array();
    $test_m = array();
    for ($ii = 0; $ii <= count($input) - 1; $ii++) {
        $twObj = new TwitterOAuth(CONSUMER_KEY, CONSUMER_SECRET, $_SESSION['oauthToken'], $_SESSION['oauthTokenSecret']);
        // REST_API指定(今回はタイムラインのREST_API)
        $tw_rest_api = 'https://api.twitter.com/1.1/statuses/user_timeline.json';
        // メソッド指定
        $request_method = 'GET';
        // クエリーオプション
        $options = array('screen_name' => $input[$ii], 'count' => '100');
        // ユーザータイムライン取得
        $tw_obj_request = $twObj->OAuthRequest($tw_rest_api, $request_method, $options);
        // json形式で取得
        $tw_obj_request_json = json_decode($tw_obj_request, true);
        // 変数生成
        $list = array();
        $lists_test = array();
        // 取得したデータを回して入れていく
        foreach ($tw_obj_request_json as $key => $value) {
            $time = date('H', strtotime((string) $value['created_at']));
            //時間を取得
            $str = $value["text"];
            $list = array("time" => $time, "text" => $str, "id" => $value['id_str'], "userID" => $value['user']['screen_name'], "name" => $value['user']['name'], "link" => $value['user']['profile_image_url']);
            array_push($lists_test, $list);
            // mecab
            ini_set('mecab.default_userdic', 'onomasticon2.dic');
            $mecab = new MeCab_Tagger();
            for ($node = $mecab->parseToNode($str); $node; $node = $node->getNext()) {
                if ($node->getStat() != 2 && $node->getStat() != 3) {
                    $resolved = array("Surface" => $node->getSurface(), "Feature" => $node->getFeature(), "Time" => $time, "Id" => $value['user']['screen_name']);
                    $feature = '/形容詞(.*)|感動詞(.*)|固有名詞(.*)/u';
                    // フィルターに掛ける品詞   形容詞(.*)|感動詞(.*)|
                    if (preg_match($feature, $resolved["Feature"])) {
                        // フィルター
                        unset($resolved["Feature"]);
                        $search = '/http|RT|\\w+|w+|笑|bot|あ+|あー|い+|え+|お+|ない|ー/u';
                        // フィルターに掛ける文字
                        $key = preg_match($search, $resolved["Surface"]);
                        // フィルター
                        if ($key == 0) {
                            array_push($time_line_texts, $resolved);
                        }
                    }
                }
            }
        }
        array_push($lists, $lists_test);
    }
    // 重複を消したリストを作成($search_list)
    $tmp = array();
    $search_list = array();
    foreach ($time_line_texts as $key => $value) {
        if (!in_array($value['Surface'], $tmp)) {
            $tmp[] = $value['Surface'];
            $search_list[] = $value;
        }
    }
    // タグクラウドで使うデータを作成($completion_data)
    for ($i = 0, $size = count($search_list) - 1; $i <= $size; $i++) {
        $mix_data = array();
        // リセット
        $search_text = $search_list[$i]["Surface"];
        $plane_data = array();
        foreach ($time_line_texts as $Surface => $c) {
            if (in_array($search_text, $c)) {
                // マッチしたら
                /* thisText 追加 */
                array_push($plane_data, $c);
            }
        }
        $count_data = count($plane_data);
        // $plane_dataの数
        $mix_data = array("Surface" => "", "Time" => array(), "Count" => $count_data, "Id" => array());
        // タグクラウドで使うデータ(配列)の1つ1つのデータ
        $mix_data["Surface"] = $plane_data[0]["Surface"];
        for ($t = 0; $t <= $count_data - 1; $t++) {
            $mix_data["Time"][$t] = array();
            $mix_data["Id"][$t] = array();
            array_push($mix_data["Time"][$t], $plane_data[$t]["Time"]);
            array_push($mix_data["Id"][$t], $plane_data[$t]["Id"]);
        }
        /*     var_dump($mix_data); */
        // フィルター
        if ($mix_data["Count"] > 1) {
            $test_key = array();
            for ($t = 0; $t <= count($mix_data["Id"]) - 1; $t++) {
                if ($mix_data["Id"][$t] == $mix_data["Id"][0]) {
                    array_push($test_key, "1");
                } else {
                    array_push($test_key, "0");
                }
            }
            //array_push($test_m , $test_key);
            if (in_array("0", $test_key)) {
                array_push($completion_data["children"], $mix_data);
            }
        }
    }
    //   return $test_m;
    // 配列をJSONにエンコード
    return array($completion_data, $lists);
    // 表示
    /* var_dump($completion_data); */
}