public function steam($words) { $tokenizerFactory = new \Sastrawi\Tokenizer\TokenizerFactory(); $tokenizer = $tokenizerFactory->createDefaultTokenizer(); $wordsArr = $tokenizer->tokenize($words); return $wordsArr; }
public function tokenizer() { require_once __DIR__ . '/sastrawi/vendor/autoload.php'; $tokenizerFactory = new \Sastrawi\Tokenizer\TokenizerFactory(); $tokenizer = $tokenizerFactory->createDefaultTokenizer(); $tokens = $tokenizer->tokenize('Saya membeli barang seharga Rp 5.000 di Jl. Prof. Soepomo no. 67.'); var_dump($tokens); }
public function tfidf() { ini_set('max_execution_time', 3600); require_once __DIR__ . '/sastrawi/vendor/autoload.php'; $tokenizerFactory = new \Sastrawi\Tokenizer\TokenizerFactory(); $tokenizer = $tokenizerFactory->createDefaultTokenizer(); $stemmerFactory = new \Sastrawi\Stemmer\StemmerFactory(); $stemmer = $stemmerFactory->createStemmer(); echo "Mulai...mengosongkan table<br>"; //$query = $this->db->get_where('data_kp', array('status' => 1)); $this->db->empty_table('stki_tf'); $this->db->empty_table('stki_terms'); $query = $this->db->get('stki_data_kp'); foreach ($query->result() as $row) { $id_doc = $row->id_doc; $judul = $row->judul; $judul_baru = $stemmer->stem($judul); $judul_baru = $this->stopword($judul_baru); $tokens = $tokenizer->tokenize($judul_baru); //Mencari term frequency foreach ($tokens as $token) { if (strlen($token) !== 0) { $query2 = $this->db->get_where('stki_terms', array('term' => $token)); $banyak = $query2->num_rows(); if ($banyak == 0) { $data = array('term' => $token); $this->db->insert('stki_terms', $data); $query2 = $this->db->get_where('stki_terms', array('term' => $token)); foreach ($query2->result() as $row2) { $id_term = $row2->id_term; } $data = array('id_term' => $id_term, 'id_doc' => $id_doc, 'tf' => 1); $this->db->insert('stki_tf', $data); } else { $query2 = $this->db->get_where('stki_terms', array('term' => $token)); foreach ($query2->result() as $row2) { $id_term = $row2->id_term; } $query3 = $this->db->get_where('stki_tf', array('id_term' => $id_term, 'id_doc' => $id_doc)); $banyak = $query3->num_rows(); if ($banyak == 0) { $data = array('id_term' => $id_term, 'id_doc' => $id_doc, 'tf' => 1); $this->db->insert('stki_tf', $data); // echo "<p>Insert ".$id_term." and ".$id_doc." to tf table.</p>"; } else { $query4 = $this->db->get_where('stki_tf', array('id_term' => $id_term, 'id_doc' => $id_doc)); foreach ($query4->result() as $row4) { $frequency = $row4->tf; $id = $row4->id; } $frequency = $frequency + 1; $data = array('id_term' => $id_term, 'id_doc' => $id_doc, 'tf' => $frequency); $this->db->where('id', $id); $this->db->update('stki_tf', $data); // echo "<p>Update frequency row with id = ".$id." and frequency = ".$frequency." to tf table.</p>"; } } } } echo 'Selesai mengolah : "' . $judul . '"(id_doc : ' . $id_doc . ')<br>'; } //Menghitung df dan idf $query = $this->db->get('stki_terms'); foreach ($query->result() as $row) { $id_term = $row->id_term; $query2 = $this->db->get_where('stki_tf', array('id_term' => $id_term)); echo "id_term : " . $id_term . "<br>"; $df = $query2->num_rows(); $n = $this->db->get('stki_data_kp')->num_rows(); $idf = log($n / $df); $data = array('df' => $df, 'idf' => $idf); $this->db->where('id_term', $id_term); $this->db->update('stki_terms', $data); } $query = $this->db->get('stki_data_kp'); foreach ($query->result() as $row) { echo "<h1>update normalized_tf for id_doc : " . $row->id_doc . "</h1><br>"; $query2 = $this->db->get_where('stki_tf', array('id_doc' => $row->id_doc)); $n_terms = $query2->num_rows(); foreach ($query2->result() as $row2) { $normalized_tf = $row2->tf / $n_terms; $data = array('normalized_tf' => $normalized_tf); $this->db->where('id', $row2->id); $this->db->update('stki_tf', $data); echo "id_term : " . $row2->id_term . "<br>"; } } }