/** * Constructor * * @return void * */ public function __construct() { $stemmerFactory = new \Sastrawi\Stemmer\StemmerFactory(); $this->stemmer = $stemmerFactory->createStemmer(); $stopWordRemoverFactory = new \Sastrawi\StopWordRemover\StopWordRemoverFactory(); $this->StopWordRemover = $stopWordRemoverFactory->createStopWordRemover(); $sentenceDetectorFactory = new SentenceDetectorFactory(); $this->sentenceDetector = $sentenceDetectorFactory->createSentenceDetector(); $tokenizerFactory = new TokenizerFactory(); $this->tokenizer = $tokenizerFactory->createDefaultTokenizer(); }
public function stemmer() { require_once __DIR__ . '/sastrawi/vendor/autoload.php'; // create stemmer // cukup dijalankan sekali saja, biasanya didaftarkan di service container $stemmerFactory = new \Sastrawi\Stemmer\StemmerFactory(); $stemmer = $stemmerFactory->createStemmer(); // stem $sentence = 'Perekonomian Indonesia sedang dalam pertumbuhan yang membanggakan'; $output = $stemmer->stem($sentence); echo $output . "\n"; // ekonomi indonesia sedang dalam tumbuh yang bangga echo $stemmer->stem('Mereka meniru-nirukannya') . "\n"; // mereka tiru }
public function get_stem() { require_once app_path() . '/library/PHPInsight/autoload.php'; $stemmerFactory = new \Sastrawi\Stemmer\StemmerFactory(); $sentiment = new \PHPInsight\Sentiment(); $stemmer = $stemmerFactory->createStemmer(); $getdata = DB::collection('scraping')->orderBy('id', 'asc')->get(); foreach ($getdata as $key) { $output = $stemmer->stem($key['article']); $scores = $sentiment->score($output); $class = $sentiment->categorise($output); $update = new Classifying(); $update->category = $class; $update->article = $key['article']; $update->user_validation = ""; $update->title = ""; $update->scraping_id = $key['_id']; $update->classifier = "new"; $update->save(); echo "<pre>" . $output . "</pre>"; echo "<blockquote>" . $class . "</blockquote>"; echo "<br><hr>"; } }
<?php // demo.php // // include composer autoloader require_once __DIR__ . '/vendor/autoload.php'; // create stemmer // cukup dijalankan sekali saja, biasanya didaftarkan di service container $stemmerFactory = new \Sastrawi\Stemmer\StemmerFactory(); $stemmer = $stemmerFactory->createStemmer(); // stem $sentence = $_GET["input"]; $output = $stemmer->stem($sentence); echo $output;
public function tfidf() { ini_set('max_execution_time', 3600); require_once __DIR__ . '/sastrawi/vendor/autoload.php'; $tokenizerFactory = new \Sastrawi\Tokenizer\TokenizerFactory(); $tokenizer = $tokenizerFactory->createDefaultTokenizer(); $stemmerFactory = new \Sastrawi\Stemmer\StemmerFactory(); $stemmer = $stemmerFactory->createStemmer(); echo "Mulai...mengosongkan table<br>"; //$query = $this->db->get_where('data_kp', array('status' => 1)); $this->db->empty_table('stki_tf'); $this->db->empty_table('stki_terms'); $query = $this->db->get('stki_data_kp'); foreach ($query->result() as $row) { $id_doc = $row->id_doc; $judul = $row->judul; $judul_baru = $stemmer->stem($judul); $judul_baru = $this->stopword($judul_baru); $tokens = $tokenizer->tokenize($judul_baru); //Mencari term frequency foreach ($tokens as $token) { if (strlen($token) !== 0) { $query2 = $this->db->get_where('stki_terms', array('term' => $token)); $banyak = $query2->num_rows(); if ($banyak == 0) { $data = array('term' => $token); $this->db->insert('stki_terms', $data); $query2 = $this->db->get_where('stki_terms', array('term' => $token)); foreach ($query2->result() as $row2) { $id_term = $row2->id_term; } $data = array('id_term' => $id_term, 'id_doc' => $id_doc, 'tf' => 1); $this->db->insert('stki_tf', $data); } else { $query2 = $this->db->get_where('stki_terms', array('term' => $token)); foreach ($query2->result() as $row2) { $id_term = $row2->id_term; } $query3 = $this->db->get_where('stki_tf', array('id_term' => $id_term, 'id_doc' => $id_doc)); $banyak = $query3->num_rows(); if ($banyak == 0) { $data = array('id_term' => $id_term, 'id_doc' => $id_doc, 'tf' => 1); $this->db->insert('stki_tf', $data); // echo "<p>Insert ".$id_term." and ".$id_doc." to tf table.</p>"; } else { $query4 = $this->db->get_where('stki_tf', array('id_term' => $id_term, 'id_doc' => $id_doc)); foreach ($query4->result() as $row4) { $frequency = $row4->tf; $id = $row4->id; } $frequency = $frequency + 1; $data = array('id_term' => $id_term, 'id_doc' => $id_doc, 'tf' => $frequency); $this->db->where('id', $id); $this->db->update('stki_tf', $data); // echo "<p>Update frequency row with id = ".$id." and frequency = ".$frequency." to tf table.</p>"; } } } } echo 'Selesai mengolah : "' . $judul . '"(id_doc : ' . $id_doc . ')<br>'; } //Menghitung df dan idf $query = $this->db->get('stki_terms'); foreach ($query->result() as $row) { $id_term = $row->id_term; $query2 = $this->db->get_where('stki_tf', array('id_term' => $id_term)); echo "id_term : " . $id_term . "<br>"; $df = $query2->num_rows(); $n = $this->db->get('stki_data_kp')->num_rows(); $idf = log($n / $df); $data = array('df' => $df, 'idf' => $idf); $this->db->where('id_term', $id_term); $this->db->update('stki_terms', $data); } $query = $this->db->get('stki_data_kp'); foreach ($query->result() as $row) { echo "<h1>update normalized_tf for id_doc : " . $row->id_doc . "</h1><br>"; $query2 = $this->db->get_where('stki_tf', array('id_doc' => $row->id_doc)); $n_terms = $query2->num_rows(); foreach ($query2->result() as $row2) { $normalized_tf = $row2->tf / $n_terms; $data = array('normalized_tf' => $normalized_tf); $this->db->where('id', $row2->id); $this->db->update('stki_tf', $data); echo "id_term : " . $row2->id_term . "<br>"; } } }