Ejemplo n.º 1
0
 private function _ignore_stopword($output)
 {
     $ignore = $this->ignore;
     $charset = $this->charset;
     $dict = $this->dict;
     $rule = $this->rule;
     //清理標點符號
     if ($ignore == true) {
         $lines = explode("\n", file_get_contents($rule));
         $start_record = false;
         $start_split = false;
         $stop_words = "";
         for ($i = 0; $i < count($lines); $i++) {
             $l = trim($lines[$i]);
             if ($l == "[nostats]") {
                 $start_record = true;
                 continue;
             }
             if ($l == "[noname]") {
                 $start_split = true;
                 continue;
             }
             if ($l == '[symbol]') {
                 continue;
             }
             if (substr($l, 0, 1) == ":" || substr($l, 0, 1) == ";") {
                 continue;
             }
             if ($l == "[pubname]") {
                 break;
             }
             if ($start_record == true) {
                 $data = $lines[$i];
                 if ($start_split == true) {
                     $data_ary = Segmentor::utf8_str_split($data);
                     $data = implode(" ", $data_ary);
                 }
                 $stop_words .= $data . " ";
             }
         }
         //把字串切割吧
         //$stop_words_ary = utf8_str_split($stop_words_ary);
         $stop_words_ary = explode(" ", $stop_words);
         //檢查每一個單一的字,是否有符合此字串
         $words_array = explode(" ", $output);
         $output = '';
         for ($i = 0; $i < count($words_array); $i++) {
             $match = false;
             for ($j = 0; $j < count($stop_words_ary); $j++) {
                 $word = $words_array[$i];
                 $index = strrpos($word, Segmentor::$speech_separator);
                 if (FALSE !== $index) {
                     $word = substr($word, 0, $index);
                 }
                 if ($word == $stop_words_ary[$j]) {
                     $match = true;
                     break;
                 }
             }
             if ($match == false) {
                 $output .= $words_array[$i] . " ";
             }
         }
     }
     $output = trim($output);
     return $output;
 }