Example #1
0
 public function steam($words)
 {
     $tokenizerFactory = new \Sastrawi\Tokenizer\TokenizerFactory();
     $tokenizer = $tokenizerFactory->createDefaultTokenizer();
     $wordsArr = $tokenizer->tokenize($words);
     return $wordsArr;
 }
Example #2
0
 public function tokenizer()
 {
     require_once __DIR__ . '/sastrawi/vendor/autoload.php';
     $tokenizerFactory = new \Sastrawi\Tokenizer\TokenizerFactory();
     $tokenizer = $tokenizerFactory->createDefaultTokenizer();
     $tokens = $tokenizer->tokenize('Saya membeli barang seharga Rp 5.000 di Jl. Prof. Soepomo no. 67.');
     var_dump($tokens);
 }
Example #3
0
 public function tfidf()
 {
     ini_set('max_execution_time', 3600);
     require_once __DIR__ . '/sastrawi/vendor/autoload.php';
     $tokenizerFactory = new \Sastrawi\Tokenizer\TokenizerFactory();
     $tokenizer = $tokenizerFactory->createDefaultTokenizer();
     $stemmerFactory = new \Sastrawi\Stemmer\StemmerFactory();
     $stemmer = $stemmerFactory->createStemmer();
     echo "Mulai...mengosongkan table<br>";
     //$query = $this->db->get_where('data_kp', array('status' => 1));
     $this->db->empty_table('stki_tf');
     $this->db->empty_table('stki_terms');
     $query = $this->db->get('stki_data_kp');
     foreach ($query->result() as $row) {
         $id_doc = $row->id_doc;
         $judul = $row->judul;
         $judul_baru = $stemmer->stem($judul);
         $judul_baru = $this->stopword($judul_baru);
         $tokens = $tokenizer->tokenize($judul_baru);
         //Mencari term frequency
         foreach ($tokens as $token) {
             if (strlen($token) !== 0) {
                 $query2 = $this->db->get_where('stki_terms', array('term' => $token));
                 $banyak = $query2->num_rows();
                 if ($banyak == 0) {
                     $data = array('term' => $token);
                     $this->db->insert('stki_terms', $data);
                     $query2 = $this->db->get_where('stki_terms', array('term' => $token));
                     foreach ($query2->result() as $row2) {
                         $id_term = $row2->id_term;
                     }
                     $data = array('id_term' => $id_term, 'id_doc' => $id_doc, 'tf' => 1);
                     $this->db->insert('stki_tf', $data);
                 } else {
                     $query2 = $this->db->get_where('stki_terms', array('term' => $token));
                     foreach ($query2->result() as $row2) {
                         $id_term = $row2->id_term;
                     }
                     $query3 = $this->db->get_where('stki_tf', array('id_term' => $id_term, 'id_doc' => $id_doc));
                     $banyak = $query3->num_rows();
                     if ($banyak == 0) {
                         $data = array('id_term' => $id_term, 'id_doc' => $id_doc, 'tf' => 1);
                         $this->db->insert('stki_tf', $data);
                         // echo "<p>Insert ".$id_term." and ".$id_doc." to tf table.</p>";
                     } else {
                         $query4 = $this->db->get_where('stki_tf', array('id_term' => $id_term, 'id_doc' => $id_doc));
                         foreach ($query4->result() as $row4) {
                             $frequency = $row4->tf;
                             $id = $row4->id;
                         }
                         $frequency = $frequency + 1;
                         $data = array('id_term' => $id_term, 'id_doc' => $id_doc, 'tf' => $frequency);
                         $this->db->where('id', $id);
                         $this->db->update('stki_tf', $data);
                         // echo "<p>Update frequency row with id =  ".$id." and frequency = ".$frequency." to tf table.</p>";
                     }
                 }
             }
         }
         echo 'Selesai mengolah : "' . $judul . '"(id_doc : ' . $id_doc . ')<br>';
     }
     //Menghitung df dan idf
     $query = $this->db->get('stki_terms');
     foreach ($query->result() as $row) {
         $id_term = $row->id_term;
         $query2 = $this->db->get_where('stki_tf', array('id_term' => $id_term));
         echo "id_term : " . $id_term . "<br>";
         $df = $query2->num_rows();
         $n = $this->db->get('stki_data_kp')->num_rows();
         $idf = log($n / $df);
         $data = array('df' => $df, 'idf' => $idf);
         $this->db->where('id_term', $id_term);
         $this->db->update('stki_terms', $data);
     }
     $query = $this->db->get('stki_data_kp');
     foreach ($query->result() as $row) {
         echo "<h1>update normalized_tf for id_doc : " . $row->id_doc . "</h1><br>";
         $query2 = $this->db->get_where('stki_tf', array('id_doc' => $row->id_doc));
         $n_terms = $query2->num_rows();
         foreach ($query2->result() as $row2) {
             $normalized_tf = $row2->tf / $n_terms;
             $data = array('normalized_tf' => $normalized_tf);
             $this->db->where('id', $row2->id);
             $this->db->update('stki_tf', $data);
             echo "id_term : " . $row2->id_term . "<br>";
         }
     }
 }