<?php /** * php-mecab/examples * dump all nodes (OO-API) * charset=utf-8 */ require_once dirname(__FILE__) . '/common.inc.php'; $mecab = new MeCab_Tagger($arg); function call_format($node) { return format($node->getSurface(), $node->getFeature(), $node->getId(), $node->getStat()); } if ($node = $mecab->parseToNode($str)) { border(); while ($node) { $ar = array('node' => $node->toArray(), 'prev' => '-', 'next' => '-', 'enext' => '-', 'bnext' => '-', 'rpath' => '-', 'lpath' => '-'); if ($prev = $node->getPrev()) { $ar['prev'] = call_format($prev); } if ($next = $node->getNext()) { $ar['next'] = call_format($next); } if ($enext = $node->getENext()) { $ar['enext'] = call_format($enext); } if ($bnext = $node->getBNext()) { $ar['bnext'] = call_format($bnext); } if ($rpath = $node->getRPath()) { $ar['rpath'] = array('prob' => $rpath->getProb(), 'cost' => $rpath->getCost(), 'rnode' => '-', 'lnode' => '-');
<?php /** * php-mecab/examples * dump all nodes (OO-API with SPL and Overloading) * charset=utf-8 */ require_once dirname(__FILE__) . '/common.inc.php'; $mecab = new MeCab_Tagger($arg); function call_format($node) { return format($node->surface, $node->feature, $node->id, $node->stat); } if ($iter = $mecab->parseToNode($str)) { border(); foreach ($iter as $node) { $ar = array('node' => $node->toArray(), 'prev' => '-', 'next' => '-', 'enext' => '-', 'bnext' => '-', 'rpath' => '-', 'lpath' => '-'); if ($node->prev) { $ar['prev'] = call_format($node->prev); } if ($node->next) { $ar['next'] = call_format($node->next); } if ($node->enext) { $ar['enext'] = call_format($node->enext); } if ($node->bnext) { $ar['bnext'] = call_format($node->bnext); } if ($rpath = $node->rpath) { $ar['rpath'] = array('prob' => $rpath->prob, 'cost' => $rpath->cost, 'rnode' => '-', 'lnode' => '-');
if ($key == 0) { array_push($time_line_texts, $resolved); } } } } } foreach ($jset3['statuses'] as $result) { $timeee = date('H', strtotime((string) $result['user']['created_at'])); //時間を取得 $lists = array("time" => $timeee, "id" => $result['id_str'], "userID" => $result['user']['screen_name'], "name" => $result['user']['name'], "link" => $result['user']['profile_image_url'], "text" => $result['text']); // ツイートのリストをliで出力 array_push($list, $lists); ini_set('mecab.default_userdic', 'onomasticon2.dic'); $mecab = new MeCab_Tagger(); for ($node = $mecab->parseToNode($result['text']); $node; $node = $node->getNext()) { if ($node->getStat() != 2 && $node->getStat() != 3) { $resolved = array("Surface" => $node->getSurface(), "Feature" => $node->getFeature(), "Time" => $timeee); $feature = '/固有名詞(.*)/u'; // フィルターに掛ける品詞 形容詞(.*)|感動詞(.*)| if (preg_match($feature, $resolved["Feature"])) { // フィルター unset($resolved["Feature"]); $search = '/http|RT|\\w+|w+|笑|bot|あ+|あー|い+|え+|お+|ない|ー/u'; // フィルターに掛ける文字 $key = preg_match($search, $resolved["Surface"]); // フィルター if ($key == 0) { array_push($time_line_texts, $resolved); } }
<?php /** * php-mecab/examples * parse string, wakati output format (OO-API with SPL and autocast) * charset=utf-8 */ require_once dirname(__FILE__) . '/common.inc.php'; $mecab = new MeCab_Tagger($arg_format); border(); foreach ($mecab->parseToNode($str_long) as $node) { echo $mecab->formatNode($node); } border(); foreach ($mecab->parseToNode($str_long) as $node) { echo $node; } border();
function funcGetKana($str) { $mecab = new MeCab_Tagger(); $kna = ""; for ($node = $mecab->parseToNode($str); $node; $node = $node->getNext()) { if ($node->getStat() != 2 && $node->getStat() != 3) { $word = $node->getSurface(); switch (true) { case isZenKana($word, "UTF-8"): $kna .= $word; //echo "全角カタカナ".$word."<br />"; break; case isHanKana($word, "UTF-8"): $kna .= mb_convert_kana($word, "K"); //echo "半角カタカナ".$word."<br />"; break; case isHiragana($word, "UTF-8"): $kna .= mb_convert_kana($word, "C"); //echo "ひらがな".$word."<br />"; break; case isZenNum($word, "UTF-8"): $kna .= $word; //echo "全角数値".$word."<br />"; break; case isHanNum($word, "UTF-8"): $kna .= mb_convert_kana($word, "N"); //echo "半角数値".$word."<br />"; break; case isZenEiji($word, "UTF-8"): $kna .= $word; //echo "全角英字".$word."<br />"; break; case isHanEiji($word, "UTF-8"): $kna .= mb_convert_kana($word, "R"); //echo "半角英字".$word."<br />"; break; case isKigou($word, "UTF-8"): $kna .= $word; //echo "記号".$word."<br />"; break; default: $fet = $node->getFeature(); $fettbl = explode(",", $fet); $kna .= $fettbl[7]; } } } //for return $kna; }
/** * php-mecab/examples * test like official bindings examples (OO-API) * charset=utf-8 */ require_once dirname(__FILE__) . '/common.inc.php'; $sentence = '太郎はこの本を二郎を見た女性に渡した。'; if (isset($_SERVER['argv'])) { $options = $_SERVER['argv']; array_shift($options); } else { $options = array(); } writeln(MeCab::VERSION); $t = new MeCab_Tagger($options); writeln($t->parse($sentence)); foreach ($t->parseToNode($sentence) as $m) { writeln($m->surface . "\t" . $m->feature); } writeln('EOS'); $di = $t->dictionaryInfo(); foreach ($di as $d) { writefln('filename: %s', $d['filename']); writefln('charset: %s', $d['charset']); writefln('size: %d', $d['size']); writefln('type: %d', $d['type']); writefln('lsize: %d', $d['lsize']); writefln('rsize: %d', $d['rsize']); writefln('version: %d', $d['version']); }
<?php /** * php-mecab/examples * parse string, wakati output format (OO-API) * charset=utf-8 */ require_once dirname(__FILE__) . '/common.inc.php'; $mecab = new MeCab_Tagger($arg_format); border(); $node = $mecab->parseToNode($str_long); while ($node) { echo $mecab->formatNode($node); $node = $node->getNext(); } border(); $node = $mecab->parseToNode($str_long); while ($node) { echo $node->toString(); $node = $node->getNext(); } border();
function create($input) { $lists = array(); $completion_data = array("name" => "tagcloud", "children" => array()); // タグクラウドで使うデータ(配列) $time_line_texts = array(); $test_m = array(); for ($ii = 0; $ii <= count($input) - 1; $ii++) { $twObj = new TwitterOAuth(CONSUMER_KEY, CONSUMER_SECRET, $_SESSION['oauthToken'], $_SESSION['oauthTokenSecret']); // REST_API指定(今回はタイムラインのREST_API) $tw_rest_api = 'https://api.twitter.com/1.1/statuses/user_timeline.json'; // メソッド指定 $request_method = 'GET'; // クエリーオプション $options = array('screen_name' => $input[$ii], 'count' => '100'); // ユーザータイムライン取得 $tw_obj_request = $twObj->OAuthRequest($tw_rest_api, $request_method, $options); // json形式で取得 $tw_obj_request_json = json_decode($tw_obj_request, true); // 変数生成 $list = array(); $lists_test = array(); // 取得したデータを回して入れていく foreach ($tw_obj_request_json as $key => $value) { $time = date('H', strtotime((string) $value['created_at'])); //時間を取得 $str = $value["text"]; $list = array("time" => $time, "text" => $str, "id" => $value['id_str'], "userID" => $value['user']['screen_name'], "name" => $value['user']['name'], "link" => $value['user']['profile_image_url']); array_push($lists_test, $list); // mecab ini_set('mecab.default_userdic', 'onomasticon2.dic'); $mecab = new MeCab_Tagger(); for ($node = $mecab->parseToNode($str); $node; $node = $node->getNext()) { if ($node->getStat() != 2 && $node->getStat() != 3) { $resolved = array("Surface" => $node->getSurface(), "Feature" => $node->getFeature(), "Time" => $time, "Id" => $value['user']['screen_name']); $feature = '/形容詞(.*)|感動詞(.*)|固有名詞(.*)/u'; // フィルターに掛ける品詞 形容詞(.*)|感動詞(.*)| if (preg_match($feature, $resolved["Feature"])) { // フィルター unset($resolved["Feature"]); $search = '/http|RT|\\w+|w+|笑|bot|あ+|あー|い+|え+|お+|ない|ー/u'; // フィルターに掛ける文字 $key = preg_match($search, $resolved["Surface"]); // フィルター if ($key == 0) { array_push($time_line_texts, $resolved); } } } } } array_push($lists, $lists_test); } // 重複を消したリストを作成($search_list) $tmp = array(); $search_list = array(); foreach ($time_line_texts as $key => $value) { if (!in_array($value['Surface'], $tmp)) { $tmp[] = $value['Surface']; $search_list[] = $value; } } // タグクラウドで使うデータを作成($completion_data) for ($i = 0, $size = count($search_list) - 1; $i <= $size; $i++) { $mix_data = array(); // リセット $search_text = $search_list[$i]["Surface"]; $plane_data = array(); foreach ($time_line_texts as $Surface => $c) { if (in_array($search_text, $c)) { // マッチしたら /* thisText 追加 */ array_push($plane_data, $c); } } $count_data = count($plane_data); // $plane_dataの数 $mix_data = array("Surface" => "", "Time" => array(), "Count" => $count_data, "Id" => array()); // タグクラウドで使うデータ(配列)の1つ1つのデータ $mix_data["Surface"] = $plane_data[0]["Surface"]; for ($t = 0; $t <= $count_data - 1; $t++) { $mix_data["Time"][$t] = array(); $mix_data["Id"][$t] = array(); array_push($mix_data["Time"][$t], $plane_data[$t]["Time"]); array_push($mix_data["Id"][$t], $plane_data[$t]["Id"]); } /* var_dump($mix_data); */ // フィルター if ($mix_data["Count"] > 1) { $test_key = array(); for ($t = 0; $t <= count($mix_data["Id"]) - 1; $t++) { if ($mix_data["Id"][$t] == $mix_data["Id"][0]) { array_push($test_key, "1"); } else { array_push($test_key, "0"); } } //array_push($test_m , $test_key); if (in_array("0", $test_key)) { array_push($completion_data["children"], $mix_data); } } } // return $test_m; // 配列をJSONにエンコード return array($completion_data, $lists); // 表示 /* var_dump($completion_data); */ }