Пример #1
0
 /**
  * Constructor
  *
  * @return void
  * */
 public function __construct()
 {
     $stemmerFactory = new \Sastrawi\Stemmer\StemmerFactory();
     $this->stemmer = $stemmerFactory->createStemmer();
     $stopWordRemoverFactory = new \Sastrawi\StopWordRemover\StopWordRemoverFactory();
     $this->StopWordRemover = $stopWordRemoverFactory->createStopWordRemover();
     $sentenceDetectorFactory = new SentenceDetectorFactory();
     $this->sentenceDetector = $sentenceDetectorFactory->createSentenceDetector();
     $tokenizerFactory = new TokenizerFactory();
     $this->tokenizer = $tokenizerFactory->createDefaultTokenizer();
 }
Пример #2
0
	<center>
	<h3>Kategorisasi Teks Bahasa Indonesia Menggunakan Modified TF IDF-KNN</h3>
	<form action="KNN.php" method="post" enctype="multipart/form-data">
		<input type="hidden" name="status" value="">
		<input id="query" name="query" type="file">
		<input type="submit">
	</form>
	</center>
	</div>
	<?php 
if (isset($_POST['status'])) {
    $path_parts = pathinfo($_FILES['query']['name']);
    $query = $path_parts['basename'];
    include 'autoloader.php';
    // create stopword remover
    $stopWordRemoverFactory = new \Sastrawi\StopWordRemover\StopWordRemoverFactory();
    $stopWordRemover = $stopWordRemoverFactory->createStopWordRemover();
    // create stemmer
    $stemmerFactory = new \Sastrawi\Stemmer\StemmerFactory();
    $stemmer = $stemmerFactory->createStemmer();
    // ambil kata pada Document 1
    $sentence = file_get_contents('./dok1.txt');
    $output = $stopWordRemover->remove($sentence);
    $output = $stemmer->stem($output);
    $tfraw1 = array_count_values(str_word_count($output, 1));
    // ambil kata pada Document 2
    $sentence_2 = file_get_contents('./dok2.txt');
    $output_2 = $stopWordRemover->remove($sentence_2);
    $output_2 = $stemmer->stem($output_2);
    $tfraw2 = array_count_values(str_word_count($output_2, 1));
    // ambil kata pada Document 3