public function createXML($file, $filename = null, $vals) { /* if(in_array($this->__getExtension($filename),$this->type)){*/ $stemmer = new stemmer(); $words = str_replace('.', ' ', $file); $words = explode(' ', $words); //create the xml document $xmlDoc = new DOMDocument(); //create the root element $root = $xmlDoc->appendChild($xmlDoc->createElement("Document")); // echo '<pre>'; // print_r($vals); // echo '</pre>'; $tutorials = array($this->__element($vals['penulis'], $vals['title'], $vals['th_buat'], $vals['ID'])); $arrkey = explode(",", $vals['meta_keyword']); foreach ($tutorials as $tut) { //create a tutorial element $tutTag = $root->appendChild($xmlDoc->createElement("Detail")); //create the author attribute $tutTag->appendChild($xmlDoc->createAttribute("Author"))->appendChild($xmlDoc->createTextNode($this->author)); //create the title element $tutTag->appendChild($xmlDoc->createElement("Title", $this->title)); //create the date element $tutTag->appendChild($xmlDoc->createElement("Year", $this->date)); $tutTag->appendChild($xmlDoc->createElement("ID", $this->ID)); //create the categories element $catTag = $tutTag->appendChild($xmlDoc->createElement("Words")); //create a category element for each category in the array foreach ($words as $cat) { $w = $this->casefolding($cat); if (!in_array($w, $arrkey)) { $kata = $stemmer->Root($w); } else { $kata = $w; } if (!in_array($kata, $this->stoplist)) { if (!empty($kata)) { $catTag->appendChild($xmlDoc->createElement("Word", trim(strtolower($kata)))); } } } } //header("Content-Type: text/plain"); $file_save = $filename . '.xml'; //make the output pretty $xmlDoc->formatOutput = true; $output = $xmlDoc->saveXML(); $destination = FILE_DIR . 'xml/'; if ($fd = @fopen($destination . $file_save, 'w')) { $msg = @fwrite($fd, $output); } return $file_save; /*} else { $this->validation(); return false; }*/ }
<?php header('Content-type: application/json; charset=UTF-8'); require "classes/stemmer.class.php"; require_once "../../lib/db.class.php"; $database = db::singleton("localhost", "dictionary-ar", "ejIJY285JIfhrjkt039IJH983tndo", "services-dictionary-ar"); $database->set_charset("utf8"); $stemmer = new stemmer(); $vars = $_GET; $lightStem = isset($vars['ls']) ? $vars['ls'] : false; if (!empty($vars['r'])) { $root = $vars['r']; $root = $stemmer->normalize($root); $result = $database->single("SELECT * FROM dictionaryTraditional \n\t\t\t\t\t\t\t\tWHERE root = '" . $root . "' OR REPLACE(REPLACE(REPLACE(dictionaryTraditional.root, 'آ','ا'), 'إ' ,'ا'), 'أ' ,'ا')\t = '" . $root . "'"); } else { if (!empty($vars['w'])) { $word = $vars['w']; $result = $database->single("SELECT * FROM dictionaryWestern WHERE word = '" . $word . "'"); if (empty($result) && $lightStem) { $queryWord; $wordDa = $stemmer->removeDefiniteArticle($word); if (empty($result)) { $word = $stemmer->normalize($word, true); $result = $database->single("SELECT * FROM dictionaryWestern WHERE normalised COLLATE utf8_unicode_ci = '" . $word . "' COLLATE utf8_unicode_ci"); if (strlen($wordDa) > 6) { $result = $database->single("SELECT * FROM dictionaryWestern WHERE normalised like '%" . $wordDa . "%'"); } else { $result = $database->single("SELECT * FROM dictionaryWestern WHERE normalised COLLATE utf8_unicode_ci = '" . $wordDa . "' COLLATE utf8_unicode_ci"); } if (empty($result)) { if ($wordDa != $word) {
function gethasil($_getTerm, $collections = null) { //print_r($_getTerm); //print_r($collections); $start = microtime(true); $the_cache = FILE_DIR . 'cache/temp/cache.txt'; file_put_contents($the_cache, (string) $_getTerm, FILE_APPEND | LOCK_EX); $params = explode(' ', strtolower($_getTerm)); $stemmer = new stemmer(); $vsm = new vsm(); $collection = array(); $this->globals = $params; foreach ($this->globals as $q) { $queryafter[$q] = $stemmer->Root($q); } $query = array_values($queryafter); $fileIndex = FILE_DIR . 'cache/index/test.txt'; // $terindex = file_get_contents($fileIndex); // $index = $vsm->normlize($terindex); $index = $vsm->getIndex($collections); //$this->pre($index); $matchDocs = array(); $docCount = count($index['docCount']); $dictionary = $index['dictionary']; // $this->pre($dictionary); foreach ($dictionary as $word => $array) { //array_unshift($collection,$word); $collection[] = $word; } if ($docCount) { $docCount = 2; } //$this->pre($collection); foreach ($collection as $key => $qterm) { $entry = $index['dictionary'][$qterm]; //echo $qterm . $this->pre($entry); if (!empty($entry)) { foreach ($entry['postings'] as $docID => $posting) { $matchDocs[$docID][$qterm] += pow($posting['tf'] * log10(($docCount + 1) / $entry['df']), 2); $matchDocs[$docID][$qterm . '_q'] += $posting['tf'] * log10(($docCount + 1) / $entry['df']); //echo $posting['tf']; // $matchDocs[$docID][$qterm] = 2; // $matchDocs[$docID][$qterm.'_q'] = 2; } } } //$this->pre($matchDocs); $sum = array(); foreach ($query as $term) { $term = strtolower($term); $term = $stemmer->Root($term); //$this->pre($term); $keys = array_keys($matchDocs); foreach ($keys as $key) { foreach ($matchDocs[$key] as $docs => $doc) { if (strtolower($docs) == $term . '_q') { $tf = $index['dictionary'][$term]['postings'][$key]['tf']; $df = 1; $k = $vsm->getW($term, $index); $match[$term][$key] = $tf * $doc; $sum[$key]['kkdotDi'] += $doc * $k[$key]; } } } } //$this->pre($match); if (count($match) >= 1) { $keys = array_keys($matchDocs); foreach ($keys as $key) { foreach ($matchDocs[$key] as $docs => $doc) { $text = explode('_', $docs); if (empty($text[1])) { $sum[$key]['sqrtDi'] += $doc; } } } //keyword $keyword = $vsm->getTfidf($queryafter, $index); // // VSM $keyss = array_keys($matchDocs); //$this->pre($keyword); foreach ($keyss as $key) { $kkdotDi = $sum[$key]['kkdotDi']; $sqrtDi = $sum[$key]['sqrtDi']; if ($kkdotDi != null) { //$cos = $kkdotDi/((sqrt($sqrtDi))*($sqrtkk)); if ($vsm->coSim($sqrtDi, $kkdotDi, $keyword) != 0) { $hasil[$key] = $vsm->coSim($sqrtDi, $kkdotDi, $keyword); } } } } else { $hasil = null; } if ($hasil != null) { arsort($hasil); } //$this->pre($sum); //$this->pre($hasil); return array('total' => round(microtime(true) - $start, 3), 'result' => $hasil); }