public function createXML($file, $filename = null, $vals) { /* if(in_array($this->__getExtension($filename),$this->type)){*/ $stemmer = new stemmer(); $words = str_replace('.', ' ', $file); $words = explode(' ', $words); //create the xml document $xmlDoc = new DOMDocument(); //create the root element $root = $xmlDoc->appendChild($xmlDoc->createElement("Document")); // echo '<pre>'; // print_r($vals); // echo '</pre>'; $tutorials = array($this->__element($vals['penulis'], $vals['title'], $vals['th_buat'], $vals['ID'])); $arrkey = explode(",", $vals['meta_keyword']); foreach ($tutorials as $tut) { //create a tutorial element $tutTag = $root->appendChild($xmlDoc->createElement("Detail")); //create the author attribute $tutTag->appendChild($xmlDoc->createAttribute("Author"))->appendChild($xmlDoc->createTextNode($this->author)); //create the title element $tutTag->appendChild($xmlDoc->createElement("Title", $this->title)); //create the date element $tutTag->appendChild($xmlDoc->createElement("Year", $this->date)); $tutTag->appendChild($xmlDoc->createElement("ID", $this->ID)); //create the categories element $catTag = $tutTag->appendChild($xmlDoc->createElement("Words")); //create a category element for each category in the array foreach ($words as $cat) { $w = $this->casefolding($cat); if (!in_array($w, $arrkey)) { $kata = $stemmer->Root($w); } else { $kata = $w; } if (!in_array($kata, $this->stoplist)) { if (!empty($kata)) { $catTag->appendChild($xmlDoc->createElement("Word", trim(strtolower($kata)))); } } } } //header("Content-Type: text/plain"); $file_save = $filename . '.xml'; //make the output pretty $xmlDoc->formatOutput = true; $output = $xmlDoc->saveXML(); $destination = FILE_DIR . 'xml/'; if ($fd = @fopen($destination . $file_save, 'w')) { $msg = @fwrite($fd, $output); } return $file_save; /*} else { $this->validation(); return false; }*/ }
function gethasil($_getTerm, $collections = null) { //print_r($_getTerm); //print_r($collections); $start = microtime(true); $the_cache = FILE_DIR . 'cache/temp/cache.txt'; file_put_contents($the_cache, (string) $_getTerm, FILE_APPEND | LOCK_EX); $params = explode(' ', strtolower($_getTerm)); $stemmer = new stemmer(); $vsm = new vsm(); $collection = array(); $this->globals = $params; foreach ($this->globals as $q) { $queryafter[$q] = $stemmer->Root($q); } $query = array_values($queryafter); $fileIndex = FILE_DIR . 'cache/index/test.txt'; // $terindex = file_get_contents($fileIndex); // $index = $vsm->normlize($terindex); $index = $vsm->getIndex($collections); //$this->pre($index); $matchDocs = array(); $docCount = count($index['docCount']); $dictionary = $index['dictionary']; // $this->pre($dictionary); foreach ($dictionary as $word => $array) { //array_unshift($collection,$word); $collection[] = $word; } if ($docCount) { $docCount = 2; } //$this->pre($collection); foreach ($collection as $key => $qterm) { $entry = $index['dictionary'][$qterm]; //echo $qterm . $this->pre($entry); if (!empty($entry)) { foreach ($entry['postings'] as $docID => $posting) { $matchDocs[$docID][$qterm] += pow($posting['tf'] * log10(($docCount + 1) / $entry['df']), 2); $matchDocs[$docID][$qterm . '_q'] += $posting['tf'] * log10(($docCount + 1) / $entry['df']); //echo $posting['tf']; // $matchDocs[$docID][$qterm] = 2; // $matchDocs[$docID][$qterm.'_q'] = 2; } } } //$this->pre($matchDocs); $sum = array(); foreach ($query as $term) { $term = strtolower($term); $term = $stemmer->Root($term); //$this->pre($term); $keys = array_keys($matchDocs); foreach ($keys as $key) { foreach ($matchDocs[$key] as $docs => $doc) { if (strtolower($docs) == $term . '_q') { $tf = $index['dictionary'][$term]['postings'][$key]['tf']; $df = 1; $k = $vsm->getW($term, $index); $match[$term][$key] = $tf * $doc; $sum[$key]['kkdotDi'] += $doc * $k[$key]; } } } } //$this->pre($match); if (count($match) >= 1) { $keys = array_keys($matchDocs); foreach ($keys as $key) { foreach ($matchDocs[$key] as $docs => $doc) { $text = explode('_', $docs); if (empty($text[1])) { $sum[$key]['sqrtDi'] += $doc; } } } //keyword $keyword = $vsm->getTfidf($queryafter, $index); // // VSM $keyss = array_keys($matchDocs); //$this->pre($keyword); foreach ($keyss as $key) { $kkdotDi = $sum[$key]['kkdotDi']; $sqrtDi = $sum[$key]['sqrtDi']; if ($kkdotDi != null) { //$cos = $kkdotDi/((sqrt($sqrtDi))*($sqrtkk)); if ($vsm->coSim($sqrtDi, $kkdotDi, $keyword) != 0) { $hasil[$key] = $vsm->coSim($sqrtDi, $kkdotDi, $keyword); } } } } else { $hasil = null; } if ($hasil != null) { arsort($hasil); } //$this->pre($sum); //$this->pre($hasil); return array('total' => round(microtime(true) - $start, 3), 'result' => $hasil); }