public static function afterTopicInsert($tc) { $editor = new \app\lib\Editor(['editor' => Yii::$app->params['settings']['editor']]); $content = $editor->parse($tc->content); $pa = new PhpAnalysis(); $pa->SetSource($tc->topic->title . strip_tags($content)); $pa->resultType = 2; $pa->differMax = true; $pa->StartAnalysis(); $tagNames = $pa->GetFinallyKeywords(3); $tagNames = explode(',', $tagNames); $tags = static::find()->select(['id', 'name'])->where(['in', 'name', $tagNames])->indexBy('name')->all(); foreach ($tagNames as $tn) { if (!empty($tags) && !empty($tags[$tn])) { $tag = $tags[$tn]; $tagTopic = new TagTopic(['tag_id' => $tag->id, 'topic_id' => $tc->topic_id]); $tagTopic->save(false); $tag->updateCounters(['topic_count' => 1]); } else { $tag = new static(['name' => $tn, 'topic_count' => 1]); $tag->save(false); $tagTopic = new TagTopic(['tag_id' => $tag->id, 'topic_id' => $tc->topic_id]); $tagTopic->save(false); } } }
/** * 实现分词功能 */ public function run($content, $num) { PhpAnalysis::$loadInit = false; $pa = new PhpAnalysis('utf-8', 'utf-8', false); $pa->LoadDict(); $pa->SetSource($content); $pa->StartAnalysis(false); $tags = $pa->GetFinallyKeywords($num - 1); return $tags; }
function get_keywords_str($content) { require APP_ROOT . '/phpanalysis.class.php'; PhpAnalysis::$loadInit = false; $pa = new PhpAnalysis('utf-8', 'utf-8', false); $pa->LoadDict(); $pa->SetSource($content); $pa->StartAnalysis(false); $tags = $pa->GetFinallyResult(); return $tags; }
public static function phpAnalysis($string, $size = 5, $do_fork = true, $do_unit = true, $do_prop = true, $do_multi = true, $pri_dict = true) { Yii::import('application.components.phpanalysis.*'); header('Content-Type: text/html; charset=utf-8'); // $str = <<<DOM //九十年代初,刘德华演出多部黑社会江湖片,扮演身在黑社会却有情有义、英气未泯的人物,其形象深深影响当时的年轻人。其后刘德华开始改变形象,角色的类型多变,演出更有深度,演艺事业更上一层楼。电影的代表作包括《九一神雕侠侣》、《赌神》、《天若有情》、《龙在江湖》、《法外情》、《烈火战车》、《旺角卡门》、《雷洛传》、《阿虎》、《瘦身男女》、《赌侠》系列、《暗战》、《无间道》、《无间道三终极无间》、《大只佬》、《天下无贼》、《墨攻》、《投名状》、《门徒》等等。 // 刘德华于1985年进军乐坛,第一张专辑是《只知道此刻爱你》,并获得很大回响。在1991年的偶像热潮下,刘德华与张学友、黎明、郭富城被传媒封为“四大天王”。1991年推出《爱不完》专辑,销售首日录音带销售共16万张,镭射唱片(CD)共72,000张。1993年1月,在香港红磡体育馆举办第一场个人演唱会。他曾六度夺得“十大劲歌金曲颁奖典礼”的“ //华仔(20张) //最受欢迎男歌星”,亦九次夺得“亚太区最受欢迎香港男歌星”;其中刘德华于2004年度同时夺得“最受欢迎男歌星”和“亚太区最受欢迎香港男歌星”,是首位同时获得这两个大奖的男歌手。至2007年刘德华因为工作忙碌,以无法抽空出席TVB的颁奖典礼。刘德华曾于1998、1999、2001及2002年度夺“四台联颁音乐大奖--传媒大奖”,四度成为四大电子传媒音乐颁奖典礼大赢家。亦在90年代台湾演艺圈年度盛事十大偶像票选中连续6年打败当红的台湾四小天王、连续6度夺得冠军,其《忘情水》、《天意》等国语专辑在台湾取得近100万销量的好成绩。 // 时至今日,帅气的刘德华仍然是影视歌坛的超级巨星,他对工作孜孜不倦,以49岁的年纪仍能成为演艺界当红偶像,可谓魅力无边 //DOM; // echo $this->createUrl('analysis'); $str = isset($string) ? $string : " "; // $str = isset($_POST['content'])?$_POST['content']:" "; // var_dump($_REQUEST); // die(); // $do_fork = $do_unit = $do_prop = true; // $do_multi = $pri_dict = false; //初始化类 //PhpAnalysis::$loadInit = false; $pa = new PhpAnalysis('utf-8', 'utf-8', $pri_dict); //载入词典 $pa->LoadDict(); //执行分词 $pa->SetSource($str); $pa->differMax = $do_multi; $pa->unitWord = $do_unit; $pa->StartAnalysis($do_fork); $result = $pa->GetFinallyResult(' ', $do_prop); $pa_foundWordStr = $pa->foundWordStr; // $result = $pa->GetFinallyIndex(); $result = explode(' ', $result); $pa = ''; $result = array_count_values($result); // $result = str_replace('‘', '', $result); array_multisort($result, SORT_DESC, SORT_NUMERIC); // echo "<pre>"; // echo "<div style='float:left; width:200px;'>".var_dump($result)."</div>"; $arr = array(); foreach ($result as $k => $v) { if (!strpos($k, '/n')) { unset($result[$k]); } else { $str = explode('/', $k); $arr[] = $str[0]; } } // echo "<div style='float:left; width:200px;'>".var_dump($result)."</div>"; // // var_dump(array_slice($result, 0, 5)); // echo "</pre>"; // return $result; return array_slice($arr, 0, $size); }
public static function getTags($txt, $len = 10) { $do_fork = $do_unit = true; $do_multi = $do_prop = $pri_dict = true; PhpAnalysis::$loadInit = false; $pa = new PhpAnalysis('utf-8', 'utf-8', $pri_dict); //载入词典 $pa->LoadDict(); $pa->SetSource(self::filter_mark($txt)); $pa->resultType = 1; $pa->notSplitLen = 5; $pa->differMax = $do_multi; $pa->unitWord = $do_unit; $pa->StartAnalysis($do_fork); $ret = $pa->GetFinallyIndex(); if (!$ret) { return []; } $tags = array_keys(array_slice($ret, 0, $len, true)); return $tags; }
<?php header('Content-Type: text/html; charset=utf-8'); require_once 'phpanalysis.class.php'; $str = "2010年1月,美国国际消费电子展 (CES)上,联想将展出一款基于ARM架构的新产品,这有可能是传统四大PC厂商首次推出的基于ARM架构的消费电子产品,也意味着在移动互联网和产业融合趋势下,传统的PC芯片霸主英特尔正在遭遇挑战。\n11月12日,联想集团副总裁兼中国区总裁夏立向本报证实,联想基于ARM架构的新产品正在筹备中。\n英特尔新闻发言人孟轶嘉表示,对第三方合作伙伴信息不便评论。\n正面交锋\nARM内部人士透露,11月5日,ARM高级副总裁lanDrew参观了联想研究院,拜访了联想负责消费产品的负责人,进一步商讨基于ARM架构的新产品。ARM是英国芯片设计厂商,全球几乎95%的手机都采用ARM设计的芯片。\n据悉,这是一款采用高通芯片(基于ARM架构)的新产品,高通产品市场总监钱志军表示,联想对此次项目很谨慎,对于产品细节不方便透露。\n夏立告诉记者,联想研究院正在考虑多种方案,此款基于ARM架构的新产品应用邻域多样化,并不是替代传统的PC,而是更丰富的满足用户的需求。目前,客户调研还没有完成,“设计、研发更前瞻一些,最终还要看市场、用户接受程度。"; echo $str; $do_fork = $do_unit = true; $do_multi = $do_prop = $pri_dict = false; //初始化类 //PhpAnalysis::$loadInit = false; $pa = new PhpAnalysis('utf-8', 'utf-8', $pri_dict); //载入词典 $pa->LoadDict(); //执行分词 $pa->SetSource($str); $pa->differMax = $do_multi; $pa->unitWord = $do_unit; $pa->StartAnalysis($do_fork); $okresult = $pa->GetFinallyResult(' ', $do_prop); $pa_foundWordStr = $pa->foundWordStr; $pa = ''; echo '<pre>'; echo "<hr />"; echo $okresult; $okresult = explode('', $okresult); var_dump($okresult); var_dump(array_count_values($okresult)); echo "<hr />"; echo $pa_foundWordStr; '</pre>';
if ($CurNewMessage > 0) { break; } sleep(3); } echo json_encode(array('Status' => 1, 'NewMessage' => $CurNewMessage)); break; case 'get_tags': Auth(1); require dirname(__FILE__) . "/includes/PHPAnalysis.class.php"; $str = $_POST['Title'] . "/r/n" . $_POST['Content']; $do_fork = $do_unit = true; $do_multi = $do_prop = $pri_dict = false; //初始化类 PhpAnalysis::$loadInit = false; $pa = new PhpAnalysis('utf-8', 'utf-8', $pri_dict); //载入词典 $pa->LoadDict(); //执行分词 $pa->SetSource($str); $pa->differMax = $do_multi; $pa->unitWord = $do_unit; $pa->StartAnalysis($do_fork); $ResultString = $pa->GetFinallyResult('|', $do_prop); $tags = array(); $tags['status'] = 0; if ($ResultString) { foreach (explode('|', $ResultString) as $key => $value) { if ($value != '' && !is_numeric($value) && mb_strlen($value, "utf-8") >= 2) { $SQLParameters[] = $value; }
<?php //编译词库 ini_set('memory_limit', '128M'); error_reporting(E_ALL); header('Content-Type: text/html; charset=utf-8'); require_once 'phpAnalysis.php'; $dicAddon = dirname(__FILE__) . '/dict/not-build/base_dic_full.txt'; if (empty($_GET['ac'])) { echo "<div style='line-height:28px;'>请选择要进行的操作:<br />"; echo "1、<a href='?ac=make'>用原始文件(dict/not-build/base_dic_full.txt)生成一个标准词典;</a><br />"; echo "2、<a href='?ac=revert'>从默认词典(dict/base_dic_full.dic),反编译出原始文件。</a></div>"; exit; } if ($_GET['ac'] == 'make') { PhpAnalysis::$loadInit = false; $pa = new PhpAnalysis('utf-8', 'utf-8', false); $pa->MakeDict($dicAddon); echo "完成词典创建!"; exit; } else { $pa = new PhpAnalysis('utf-8', 'utf-8', true); $pa->ExportDict('base_dic_source.txt'); echo "完成反编译词典文件,生成的文件为:base_dic_source.txt !"; exit; } ?>
public function segment($keyword) { $pa = new PhpAnalysis(); $pa->SetSource($keyword); $pa->resultType = 2; $pa->differMax = true; $pa->StartAnalysis(); return trim($pa->GetFinallyResult()); }
function show() { $id = $this->spArgs("id", ""); $dbfilm = spClass('dbfilm'); $conditions = array("id" => "{$id}"); $result = $dbfilm->find($conditions); $dbfile = spClass('dbfile'); $conditions1 = array("fmid" => "{$id}"); $result1 = $dbfile->findALL($conditions1); $result1 = $result1; $cl = new SphinxClient(); $info = $result['name']; spClass('dbfilm')->updateField(array('id' => "{$id}"), 'lastac', time()); spClass('dbfilm')->incrField(array('id' => "{$id}"), 'click'); $this->result = $result; $this->result1 = $result1; $pa = new PhpAnalysis(); $pa->SetSource("{$info} "); $pa->resultType = 2; $pa->differMax = true; $pa->StartAnalysis(); $arr = $pa->GetFinallyResult("|"); $str = $arr; $arr = explode("|", $str); $this->arr = $arr; $tpl = $this->spArgs("tpl", "template"); $this->display("{$tpl}/item.html"); }
<?php $t = microtime(1); require_once 'phpanalysis.class.php'; $str = <<<EOT 2010年1月,美国国际消费电子展 (CES)上,联想将展出一款基于ARM架构的新产品,这有可能是传统四大PC厂商首次推出的基于ARM架构的消费电子产品,也意味着在移动互联网和产业融合趋势下,传统的PC芯片霸主英特尔正在遭遇挑战。 11月12日,联想集团副总裁兼中国区总裁夏立向本报证实,联想基于ARM架构的新产品正在筹备中。 英特尔新闻发言人孟轶嘉表示,对第三方合作伙伴信息不便评论。 正面交锋 ARM内部人士透露,11月5日,ARM高级副总裁lanDrew参观了联想研究院,拜访了联想负责消费产品的负责人,进一步商讨基于ARM架构的新产品。ARM是英国芯片设计厂商,全球几乎95%的手机都采用ARM设计的芯片。 据悉,这是一款采用高通芯片(基于ARM架构)的新产品,高通产品市场总监钱志军表示,联想对此次项目很谨慎,对于产品细节不方便透露。 夏立告诉记者,联想研究院正在考虑多种方案,此款基于ARM架构的新产品应用邻域多样化,并不是替代传统的PC,而是更丰富的满足用户的需求。目前,客户调研还没有完成,“设计、研发更前瞻一些,最终还要看市场、用户接受程度。” EOT; //初始化类 PhpAnalysis::$loadInit = false; $pa = new PhpAnalysis('utf-8', 'utf-8', 0); //载入词典 $pa->LoadDict(); //执行分词 $pa->SetSource($str); $pa->differMax = 0; $pa->unitWord = 0; $pa->StartAnalysis(0); $okresult = $pa->GetFinallyResult(' ', 0); echo $okresult; echo '<br>'; echo microtime(1) - $t;
$do_fork = $do_unit = true; $do_multi = $do_prop = $pri_dict = false; if ($str != '') { //岐义处理 $do_fork = empty($_POST['do_fork']) ? false : true; //新词识别 $do_unit = empty($_POST['do_unit']) ? false : true; //多元切分 $do_multi = empty($_POST['do_multi']) ? false : true; //词性标注 $do_prop = empty($_POST['do_prop']) ? false : true; //是否预载全部词条 $pri_dict = empty($_POST['pri_dict']) ? false : true; $tall = microtime(true); //初始化类 PhpAnalysis::$loadInit = false; $pa = new ruby('utf-8', 'utf-8', $pri_dict); print_memory('初始化对象', $memory_info); //载入词典 $pa->LoadDict(); print_memory('载入基本词典', $memory_info); //执行分词 $pa->SetSource($str); $pa->differMax = $do_multi; $pa->unitWord = $do_unit; $pa->StartAnalysis($do_fork); print_memory('执行分词', $memory_info); $paArray = $pa->GetFinallyResultAsArray(); $okresult = $pa->GetFinallyResult(' ', $do_prop); print_memory('输出分词结果', $memory_info); $pa_foundWordStr = $pa->foundWordStr;
/** * 显示文章页 * * @param int $id 文章id */ public function articlePreview($id) { $article = Articles::find($id); $a_moreimg = Moreimg::where('a_id', $id)->get()->toArray(); array_unshift($a_moreimg, array('title' => $article->title, 'img' => $article->img)); $images = array(); if (count($a_moreimg)) { $i = 0; foreach ($a_moreimg as $a_img) { $images[$i]['title'] = $a_img['title']; $images[$i]['image'] = $this->source_dir . 'l/articles/' . $a_img['img']; $i++; } } $list_id = Articles::where('c_id', $article->c_id)->where($this->type . '_show', '1')->orderBy('is_top', 'desc')->orderBy('created_at', 'desc')->select('id', 'title', 'img', 'introduction', 'created_at')->lists('id'); foreach ($list_id as $key => $val) { $article_prev = NULL; $article_next = NULL; if ($val == $id) { if ($key != 0) { $prev_id = $list_id[$key - 1]; $article_prev = Articles::find($prev_id); } if ($key < count($list_id) - 1) { $next_id = $list_id[$key + 1]; $article_next = Articles::find($next_id); } break; } } $result = $this->pagePublic($article->c_id); foreach ($result['navs'] as $nav) { if ($nav['current'] == 1) { $pagenavs = $nav['childmenu']; break; } else { $pagenavs = []; } } $result['pagenavs'] = $pagenavs; $result['posnavs'] = $this->getPosNavs($article->c_id); $result['title'] = $article->title; $result['keywords'] = $article->keywords; $result['description'] = $article->introduction; $result['article']['title'] = $article->title; $result['article']['keywords'] = $article->keywords; $result['article']['description'] = $article->introduction; $result['article']['viewcount'] = '<em id="article-viewcount">0</em>'; $article_type = Articles::leftJoin('classify', 'classify.id', '=', 'article.c_id')->where('article.id', $id)->pluck('article_type'); if ($article_type == 1) { //新闻内容 $viewname = 'content-news'; } elseif ($article_type == 2) { //产品内容 $viewname = 'content-product'; } else { //跳转404 } //关联文章查询 $pa = new PhpAnalysis(); $pa->SetSource($article->title); //设置分词属性 $pa->resultType = 2; $pa->differMax = true; $pa->StartAnalysis(); //获取你想要的结果 $keywords = $pa->GetFinallyIndex(); if (count($keywords)) { $relation_where = ""; foreach ($keywords as $key => $word) { $relation_where .= "or title like '%{$key}%' "; } $relation_where = ltrim($relation_where, 'or'); $prefix = Config::get('database.connections.mysql.prefix'); $related_data = DB::select("select id,title,img as image,introduction,created_at,c_id from {$prefix}article where cus_id={$this->cus_id} and ({$relation_where})"); $related = array(); if (count($related_data)) { foreach ($related_data as $val) { $temp_arr = []; $temp_arr['title'] = $val->title; $temp_arr['description'] = $val->introduction; $temp_arr['image'] = $this->source_dir . 'l/articles/' . $val->image; if ($this->showtype == 'preview') { $temp_arr['link'] = $this->domain . '/detail/' . $val->id; $temp_arr['category']['link'] = $this->domain . '/category/' . $val->id . '.html'; } else { $temp_arr['link'] = $this->domain . '/detail/' . $val->id . '.html'; $temp_arr['category']['link'] = $this->domain . '/category/' . $val->id . '.html'; } $temp_arr['pubdate'] = $val->created_at; $temp_arr['pubtimestamp'] = strtotime($val->created_at); $a_c_info = Classify::where('id', $val->c_id)->first(); $temp_arr['category']['name'] = $a_c_info->name; $temp_arr['category']['en_name'] = $a_c_info->en_name; $temp_arr['category']['icon'] = '<i class="iconfont">' . $a_c_info->icon . '</i>'; $related[] = $temp_arr; } } } //dd($article_prev); if ($this->showtype == 'preview') { if ($article_next === NULL) { $result['article']['next']['title'] = '已经是最后一篇'; $result['article']['next']['link'] = ''; } else { $result['article']['next']['title'] = $article_next->title; $result['article']['next']['link'] = $this->domain . '/detail/' . $article_next->id; } if ($article_prev === NULL) { $result['article']['prev']['title'] = '已经是第一篇'; $result['article']['prev']['link'] = ''; } else { $result['article']['prev']['title'] = $article_prev->title; $result['article']['prev']['link'] = $this->domain . '/detail/' . $article_prev->id; } $result['article']['image'] = $this->source_dir . 'l/articles/' . $article->img; $result['article']['images'] = $images; $result['article']['content'] = $article->content; } else { if ($article_next === NULL) { $result['article']['next']['title'] = '已经是最后一篇'; $result['article']['next']['link'] = ''; } else { $result['article']['next']['title'] = $article_next->title; $result['article']['next']['link'] = $this->domain . '/detail/' . $article_next->id . '.html'; } if ($article_prev === NULL) { $result['article']['prev']['title'] = '已经是第一篇'; $result['article']['prev']['link'] = ''; } else { $result['article']['prev']['title'] = $article_prev->title; $result['article']['prev']['link'] = $this->domain . '/detail/' . $article_prev->id . '.html'; } $result['article']['image'] = $this->source_dir . 'l/articles/' . $article->img; $result['article']['images'] = $images; $result['article']['content'] = preg_replace('/\\/customers\\/' . $this->customer . '/i', '', $article->content); } $result['article']['description'] = $article->introduction; $result['article']['pubdate'] = $article->created_at; $result['article']['pubtimestamp'] = strtotime($article->created_at); $result['article']['category'] = $result['posnavs'][count($result['posnavs']) - 1]; $result['related'] = $related; $json_keys = $this->getJsonKey($viewname . '.html'); if (count($json_keys)) { foreach ($json_keys as $key) { $result[$key] = $this->detailList($this->pagedata($key)); } } $smarty = new Smarty(); $smarty->setTemplateDir(app_path('views/templates/' . $this->themename)); $smarty->setCompileDir(app_path('storage/views/compile')); $smarty->registerPlugin('function', 'mapExt', array('PrintController', 'createMap')); $smarty->registerPlugin('function', 'shareExt', array('PrintController', 'createShare')); $smarty->assign($result); $smarty->display($viewname . '.html'); //return View::make('templates.'.$this->themename.'.'.$viewname,$result); }
$pa->StartAnalysis( $do_fork ); print_memory('执行分词', $memory_info); $okresult = $pa->GetFinallyResult(' ', $do_prop); print_memory('输出分词结果', $memory_info); $pa_foundWordStr = $pa->foundWordStr; function print_memory($rc, &$infostr) { global $ntime; $cutime = microtime(true); $etime = sprintf('%0.4f', $cutime - $ntime); $m = sprintf('%0.2f', memory_get_usage()/1024/1024); $infostr .= "{$rc}: {$m} MB 用时:{$etime} 秒<br />\n"; $ntime = $cutime; } $ntime = '';*/ $teststr = '我们是共产党我们'; $pa = new PhpAnalysis(); $pa->SetSource($teststr); //设置分词属性 $pa->resultType = 2; $pa->differMax = true; $pa->StartAnalysis(); echo '<pre>'; //获取你想要的结果 print_r($pa->GetFinallyIndex());