コード例 #1
0
ファイル: xml.php プロジェクト: novayadi85/CBR
 public function createXML($file, $filename = null, $vals)
 {
     /*
     		if(in_array($this->__getExtension($filename),$this->type)){*/
     $stemmer = new stemmer();
     $words = str_replace('.', ' ', $file);
     $words = explode(' ', $words);
     //create the xml document
     $xmlDoc = new DOMDocument();
     //create the root element
     $root = $xmlDoc->appendChild($xmlDoc->createElement("Document"));
     // echo '<pre>';
     // print_r($vals);
     // echo '</pre>';
     $tutorials = array($this->__element($vals['penulis'], $vals['title'], $vals['th_buat'], $vals['ID']));
     $arrkey = explode(",", $vals['meta_keyword']);
     foreach ($tutorials as $tut) {
         //create a tutorial element
         $tutTag = $root->appendChild($xmlDoc->createElement("Detail"));
         //create the author attribute
         $tutTag->appendChild($xmlDoc->createAttribute("Author"))->appendChild($xmlDoc->createTextNode($this->author));
         //create the title element
         $tutTag->appendChild($xmlDoc->createElement("Title", $this->title));
         //create the date element
         $tutTag->appendChild($xmlDoc->createElement("Year", $this->date));
         $tutTag->appendChild($xmlDoc->createElement("ID", $this->ID));
         //create the categories element
         $catTag = $tutTag->appendChild($xmlDoc->createElement("Words"));
         //create a category element for each category in the array
         foreach ($words as $cat) {
             $w = $this->casefolding($cat);
             if (!in_array($w, $arrkey)) {
                 $kata = $stemmer->Root($w);
             } else {
                 $kata = $w;
             }
             if (!in_array($kata, $this->stoplist)) {
                 if (!empty($kata)) {
                     $catTag->appendChild($xmlDoc->createElement("Word", trim(strtolower($kata))));
                 }
             }
         }
     }
     //header("Content-Type: text/plain");
     $file_save = $filename . '.xml';
     //make the output pretty
     $xmlDoc->formatOutput = true;
     $output = $xmlDoc->saveXML();
     $destination = FILE_DIR . 'xml/';
     if ($fd = @fopen($destination . $file_save, 'w')) {
         $msg = @fwrite($fd, $output);
     }
     return $file_save;
     /*} else {
     			$this->validation();
     			return false;
     		}*/
 }
コード例 #2
0
ファイル: request.php プロジェクト: mauriciogsc/ATbarPT
<?php

header('Content-type: application/json; charset=UTF-8');
require "classes/stemmer.class.php";
require_once "../../lib/db.class.php";
$database = db::singleton("localhost", "dictionary-ar", "ejIJY285JIfhrjkt039IJH983tndo", "services-dictionary-ar");
$database->set_charset("utf8");
$stemmer = new stemmer();
$vars = $_GET;
$lightStem = isset($vars['ls']) ? $vars['ls'] : false;
if (!empty($vars['r'])) {
    $root = $vars['r'];
    $root = $stemmer->normalize($root);
    $result = $database->single("SELECT * FROM dictionaryTraditional \n\t\t\t\t\t\t\t\tWHERE root = '" . $root . "' OR REPLACE(REPLACE(REPLACE(dictionaryTraditional.root, 'آ','ا'), 'إ' ,'ا'), 'أ' ,'ا')\t = '" . $root . "'");
} else {
    if (!empty($vars['w'])) {
        $word = $vars['w'];
        $result = $database->single("SELECT * FROM dictionaryWestern WHERE word = '" . $word . "'");
        if (empty($result) && $lightStem) {
            $queryWord;
            $wordDa = $stemmer->removeDefiniteArticle($word);
            if (empty($result)) {
                $word = $stemmer->normalize($word, true);
                $result = $database->single("SELECT * FROM dictionaryWestern WHERE normalised COLLATE utf8_unicode_ci = '" . $word . "' COLLATE utf8_unicode_ci");
                if (strlen($wordDa) > 6) {
                    $result = $database->single("SELECT * FROM dictionaryWestern WHERE normalised like '%" . $wordDa . "%'");
                } else {
                    $result = $database->single("SELECT * FROM dictionaryWestern WHERE normalised COLLATE utf8_unicode_ci = '" . $wordDa . "' COLLATE utf8_unicode_ci");
                }
                if (empty($result)) {
                    if ($wordDa != $word) {
コード例 #3
0
ファイル: pencarian.php プロジェクト: novayadi85/CBR
 function gethasil($_getTerm, $collections = null)
 {
     //print_r($_getTerm);
     //print_r($collections);
     $start = microtime(true);
     $the_cache = FILE_DIR . 'cache/temp/cache.txt';
     file_put_contents($the_cache, (string) $_getTerm, FILE_APPEND | LOCK_EX);
     $params = explode(' ', strtolower($_getTerm));
     $stemmer = new stemmer();
     $vsm = new vsm();
     $collection = array();
     $this->globals = $params;
     foreach ($this->globals as $q) {
         $queryafter[$q] = $stemmer->Root($q);
     }
     $query = array_values($queryafter);
     $fileIndex = FILE_DIR . 'cache/index/test.txt';
     // $terindex = file_get_contents($fileIndex);
     // $index = $vsm->normlize($terindex);
     $index = $vsm->getIndex($collections);
     //$this->pre($index);
     $matchDocs = array();
     $docCount = count($index['docCount']);
     $dictionary = $index['dictionary'];
     // $this->pre($dictionary);
     foreach ($dictionary as $word => $array) {
         //array_unshift($collection,$word);
         $collection[] = $word;
     }
     if ($docCount) {
         $docCount = 2;
     }
     //$this->pre($collection);
     foreach ($collection as $key => $qterm) {
         $entry = $index['dictionary'][$qterm];
         //echo $qterm . $this->pre($entry);
         if (!empty($entry)) {
             foreach ($entry['postings'] as $docID => $posting) {
                 $matchDocs[$docID][$qterm] += pow($posting['tf'] * log10(($docCount + 1) / $entry['df']), 2);
                 $matchDocs[$docID][$qterm . '_q'] += $posting['tf'] * log10(($docCount + 1) / $entry['df']);
                 //echo $posting['tf'];
                 // $matchDocs[$docID][$qterm] =  2;
                 // $matchDocs[$docID][$qterm.'_q'] = 2;
             }
         }
     }
     //$this->pre($matchDocs);
     $sum = array();
     foreach ($query as $term) {
         $term = strtolower($term);
         $term = $stemmer->Root($term);
         //$this->pre($term);
         $keys = array_keys($matchDocs);
         foreach ($keys as $key) {
             foreach ($matchDocs[$key] as $docs => $doc) {
                 if (strtolower($docs) == $term . '_q') {
                     $tf = $index['dictionary'][$term]['postings'][$key]['tf'];
                     $df = 1;
                     $k = $vsm->getW($term, $index);
                     $match[$term][$key] = $tf * $doc;
                     $sum[$key]['kkdotDi'] += $doc * $k[$key];
                 }
             }
         }
     }
     //$this->pre($match);
     if (count($match) >= 1) {
         $keys = array_keys($matchDocs);
         foreach ($keys as $key) {
             foreach ($matchDocs[$key] as $docs => $doc) {
                 $text = explode('_', $docs);
                 if (empty($text[1])) {
                     $sum[$key]['sqrtDi'] += $doc;
                 }
             }
         }
         //keyword
         $keyword = $vsm->getTfidf($queryafter, $index);
         //
         // VSM
         $keyss = array_keys($matchDocs);
         //$this->pre($keyword);
         foreach ($keyss as $key) {
             $kkdotDi = $sum[$key]['kkdotDi'];
             $sqrtDi = $sum[$key]['sqrtDi'];
             if ($kkdotDi != null) {
                 //$cos = $kkdotDi/((sqrt($sqrtDi))*($sqrtkk));
                 if ($vsm->coSim($sqrtDi, $kkdotDi, $keyword) != 0) {
                     $hasil[$key] = $vsm->coSim($sqrtDi, $kkdotDi, $keyword);
                 }
             }
         }
     } else {
         $hasil = null;
     }
     if ($hasil != null) {
         arsort($hasil);
     }
     //$this->pre($sum);
     //$this->pre($hasil);
     return array('total' => round(microtime(true) - $start, 3), 'result' => $hasil);
 }