Ejemplo n.º 1
0
 function CheckKeyword($keyword, $bStemmed = false)
 {
     if (!is_array($this->__index)) {
         return false;
     }
     if (is_array($keyword)) {
         $arResult = array();
         foreach ($keyword as $key => $word) {
             $arResult[$key] = $this->CheckKeyword($bStemmed ? $key : $word, $bStemmed);
         }
         return $arResult;
     }
     if (!$bStemmed && $this->bSearch) {
         $keyword = stemming($keyword, $this->__lang);
     }
     if (is_array($keyword)) {
         return $this->CheckKeyword($keyword, true);
     }
     $arResult = array('TOTAL' => intval($this->__index['TOTAL'][$keyword]), 'BOLD' => intval($this->__index['BOLD'][$keyword]), 'ITALIC' => intval($this->__index['ITALIC'][$keyword]), 'LINK' => intval($this->__index['LINK'][$keyword]), 'LINK_EXTERNAL' => intval($this->__index['LINK_EXTERNAL'][$keyword]), 'DESCRIPTION' => intval($this->__index['DESCRIPTION'][$keyword]), 'KEYWORDS' => intval($this->__index['KEYWORDS'][$keyword]), 'TITLE' => intval($this->__index['TITLE'][$keyword]), 'H1' => intval($this->__index['H1'][$keyword]), 'CONTRAST' => $this->_GetContrast($keyword));
     return $arResult;
 }
Ejemplo n.º 2
0
 function StemIndex($arLID, $ID, $sContent)
 {
     $DB = CDatabase::GetModuleConnection('search');
     static $CACHE_SITE_LANGS = array();
     $ID = intval($ID);
     $arLang = array();
     if (!is_array($arLID)) {
         $arLID = array();
     }
     foreach ($arLID as $site => $url) {
         if (!array_key_exists($site, $CACHE_SITE_LANGS)) {
             $db_site_tmp = CSite::GetByID($site);
             if ($ar_site_tmp = $db_site_tmp->Fetch()) {
                 $CACHE_SITE_LANGS[$site] = array("LANGUAGE_ID" => $ar_site_tmp["LANGUAGE_ID"], "CHARSET" => $ar_site_tmp["CHARSET"], "SERVER_NAME" => $ar_site_tmp["SERVER_NAME"]);
             } else {
                 $CACHE_SITE_LANGS[$site] = false;
             }
         }
         if (is_array($CACHE_SITE_LANGS[$site])) {
             $arLang[$CACHE_SITE_LANGS[$site]["LANGUAGE_ID"]] = true;
         }
     }
     foreach ($arLang as $lang => $value) {
         $sql_lang = $DB->ForSql($lang);
         $arDoc = stemming($sContent, $lang);
         $docLength = array_sum($arDoc);
         if (BX_SEARCH_VERSION > 1) {
             $arPos = stemming($sContent, $lang, false, true);
             CSearch::RegisterStem($arDoc);
         }
         if ($docLength > 0) {
             $doc = "";
             $logDocLength = log($docLength < 20 ? 20 : $docLength);
             $strSqlPrefix = "\n\t\t\t\t\t\tinsert ignore into b_search_content_stem\n\t\t\t\t\t\t(SEARCH_CONTENT_ID, LANGUAGE_ID, STEM, TF" . (BX_SEARCH_VERSION > 1 ? ",PS" : "") . ")\n\t\t\t\t\t\tvalues\n\t\t\t\t";
             $maxValuesLen = 2048;
             $strSqlValues = "";
             if (BX_SEARCH_VERSION > 1) {
                 foreach ($arDoc as $word => $count) {
                     $stem_id = CSearch::RegisterStem($word);
                     //This is almost impossible, but happens
                     if ($stem_id > 0) {
                         $strSqlValues .= ",\n(" . $ID . ", '" . $sql_lang . "'" . ", " . CSearch::RegisterStem($word) . ", " . number_format(log($count + 1) / $logDocLength, 4, ".", "") . ", " . number_format($arPos[$word] / $count, 4, ".", "") . ")";
                     }
                     if (strlen($strSqlValues) > $maxValuesLen) {
                         $DB->Query($strSqlPrefix . substr($strSqlValues, 2), false, "File: " . __FILE__ . "<br>Line: " . __LINE__);
                         $strSqlValues = "";
                     }
                 }
             } else {
                 foreach ($arDoc as $word => $count) {
                     $strSqlValues .= ",\n(" . $ID . ", '" . $sql_lang . "'" . ", '" . $DB->ForSQL($word) . "'" . ", " . number_format(log($count + 1) / $logDocLength, 4, ".", "") . ")";
                     if (strlen($strSqlValues) > $maxValuesLen) {
                         $DB->Query($strSqlPrefix . substr($strSqlValues, 2), false, "File: " . __FILE__ . "<br>Line: " . __LINE__);
                         $strSqlValues = "";
                     }
                 }
             }
             if (strlen($strSqlValues) > 0) {
                 $DB->Query($strSqlPrefix . substr($strSqlValues, 2), false, "File: " . __FILE__ . "<br>Line: " . __LINE__);
                 $strSqlValues = "";
             }
         }
     }
 }
Ejemplo n.º 3
0
 public static function Edit($Params)
 {
     global $DB;
     $source_id = false;
     $arFields = $Params['arFields'];
     $bNew = !isset($arFields['ID']) || $arFields['ID'] <= 0;
     $bFile_FD = $Params['path'] && strlen($Params['path']) > 0;
     $bFile_PC = $Params['file'] && strlen($Params['file']['name']) > 0 && $Params['file']['size'] > 0;
     $io = CBXVirtualIo::GetInstance();
     if ($bFile_FD || $bFile_PC) {
         if ($bFile_FD) {
             $DocRoot = CSite::GetSiteDocRoot(false);
             $tmp_name = $DocRoot . $Params['path'];
             if ($io->FileExists($tmp_name)) {
                 $flTmp = $io->GetFile($tmp_name);
                 $file_name = substr($Params['path'], strrpos($Params['path'], '/') + 1);
                 $arFile = array("name" => $file_name, "size" => $flTmp->GetFileSize(), "tmp_name" => $tmp_name, "type" => CFile::IsImage($file_name) ? 'image' : 'file');
             }
         } else {
             if ($bFile_PC) {
                 $arFile = $Params['file'];
             }
         }
         if (!CMedialib::CheckFileExtention($arFile["name"])) {
             return false;
         }
         if (!$bNew) {
             $arFile["old_file"] = CMedialibItem::GetSourceId($arFields['ID']);
             $arFile["del"] = "Y";
         }
         // Resizing Image
         if (CFile::IsImage($arFile["name"])) {
             $arSize = array('width' => COption::GetOptionInt('fileman', "ml_max_width", 1024), 'height' => COption::GetOptionInt('fileman', "ml_max_height", 1024));
             $res = CFile::ResizeImage($arFile, $arSize);
         }
         $arFile["MODULE_ID"] = "fileman";
         $source_id = CFile::SaveFile($arFile, "medialibrary");
         if ($source_id) {
             $r = CFile::GetByID($source_id);
             if ($arFile = $r->Fetch()) {
                 if (CFile::IsImage($arFile['FILE_NAME'])) {
                     CMedialibItem::GenerateThumbnail($arFile, array('width' => COption::GetOptionInt('fileman', "ml_thumb_width", 140), 'height' => COption::GetOptionInt('fileman', "ml_thumb_height", 105)));
                 }
                 $arFile['PATH'] = CMedialibItem::GetFullPath($arFile);
             }
         }
     }
     // TODO: Add error handling
     if ($bNew && !$source_id) {
         return false;
     }
     // 2. Add to b_medialib_item
     if (!isset($arFields['~DATE_UPDATE'])) {
         $arFields['~DATE_UPDATE'] = $DB->CurrentTimeFunction();
     }
     if (!CMedialibItem::CheckFields($arFields)) {
         return false;
     }
     if (CModule::IncludeModule("search")) {
         $arStem = stemming($arFields['NAME'] . ' ' . $arFields['DESCRIPTION'] . ' ' . $arFields['KEYWORDS'], LANGUAGE_ID);
         if (count($arStem) > 0) {
             $arFields['SEARCHABLE_CONTENT'] = '{' . implode('}{', array_keys($arStem)) . '}';
         } else {
             $arFields['SEARCHABLE_CONTENT'] = '';
         }
     }
     if ($bNew) {
         unset($arFields['ID']);
         $arFields['SOURCE_ID'] = $source_id;
         $arFields['~DATE_CREATE'] = $arFields['~DATE_UPDATE'];
         $arFields['ITEM_TYPE'] = '';
         $ID = CDatabase::Add("b_medialib_item", $arFields, array("DESCRIPTION", "SEARCHABLE_CONTENT"));
     } else {
         if ($source_id) {
             $arFields['SOURCE_ID'] = $source_id;
         }
         $ID = $arFields['ID'];
         unset($arFields['ID']);
         $strUpdate = $DB->PrepareUpdate("b_medialib_item", $arFields);
         $strSql = "UPDATE b_medialib_item SET " . $strUpdate . " WHERE ID=" . IntVal($ID);
         $DB->QueryBind($strSql, array("DESCRIPTION" => $arFields["DESCRIPTION"], "SEARCHABLE_CONTENT" => $arFields["SEARCHABLE_CONTENT"]), false, "File: " . __FILE__ . "<br>Line: " . __LINE__);
     }
     // 3. Set fields to b_medialib_collection_item
     if (!$bNew) {
         $strSql = "DELETE FROM b_medialib_collection_item WHERE ITEM_ID=" . IntVal($ID);
         $DB->Query($strSql, false, "FILE: " . __FILE__ . "<br> LINE: " . __LINE__);
     }
     $strCollections = "0";
     for ($i = 0, $l = count($Params['arCollections']); $i < $l; $i++) {
         $strCollections .= "," . IntVal($Params['arCollections'][$i]);
     }
     $strSql = "INSERT INTO b_medialib_collection_item(ITEM_ID, COLLECTION_ID) " . "SELECT " . intVal($ID) . ", ID " . "FROM b_medialib_collection " . "WHERE ID in (" . $strCollections . ")";
     $res = $DB->Query($strSql, false, "FILE: " . __FILE__ . "<br> LINE: " . __LINE__);
     if (!$arFields['ID']) {
         $arFields['ID'] = $ID;
     }
     if ($source_id) {
         $arFields = array_merge($arFile, $arFields);
     }
     return $arFields;
 }
Ejemplo n.º 4
0
 function StemIndex($arLID, $ID, $sContent)
 {
     global $DB;
     $arLang = array();
     if (!is_array($arLID)) {
         $arLID = array();
     }
     foreach ($arLID as $site => $url) {
         if (!isset($GLOBALS["CACHE_SEARCH_SITE_LANGS"][$site])) {
             $db_site_tmp = CSite::GetByID($site);
             if ($ar_site_tmp = $db_site_tmp->Fetch()) {
                 $GLOBALS["CACHE_SEARCH_SITE_LANGS"][$site] = array("LANGUAGE_ID" => $ar_site_tmp["LANGUAGE_ID"], "CHARSET" => $ar_site_tmp["CHARSET"], "SERVER_NAME" => $ar_site_tmp["SERVER_NAME"]);
             }
         }
         if (isset($GLOBALS["CACHE_SEARCH_SITE_LANGS"][$site])) {
             $arLang[$GLOBALS["CACHE_SEARCH_SITE_LANGS"][$site]["LANGUAGE_ID"]]++;
         }
     }
     foreach ($arLang as $lang => $value) {
         $arDoc = stemming($sContent, $lang);
         $docLength = 0;
         foreach ($arDoc as $word => $count) {
             $docLength += $count;
         }
         if ($docLength > 0) {
             $doc = "";
             $logDocLength = log($docLength < 20 ? 20 : $docLength);
             $strSqlPrefix = "\n\t\t\t\t\t\tinsert into b_search_content_stem\n\t\t\t\t\t\t(SEARCH_CONTENT_ID, LANGUAGE_ID, STEM, TF)\n\t\t\t\t\t\tSELECT " . $ID . ", '" . $lang . "', T.STEM, T.TF\n\t\t\t\t\t\tFROM table(cast(f_stem('\n\t\t\t\t";
             $maxValuesLen = 1024;
             $strSqlValues = "";
             foreach ($arDoc as $word => $count) {
                 $strSqlValues .= " " . $word . ";" . number_format(log($count + 1) / $docLength, 4, ".", "");
                 if (strlen($strSqlValues) > $maxValuesLen) {
                     $DB->Query($strSqlPrefix . substr($strSqlValues, 1) . "') as tt_stem)) t", false, "File: " . __FILE__ . "<br>Line: " . __LINE__);
                     $strSqlValues = "";
                 }
             }
             if (strlen($strSqlValues) > 0) {
                 $DB->Query($strSqlPrefix . substr($strSqlValues, 1) . "') as tt_stem)) t", false, "File: " . __FILE__ . "<br>Line: " . __LINE__);
                 $strSqlValues = "";
             }
         }
     }
 }
Ejemplo n.º 5
0
	function StemWord($w)
	{
		static $preg_ru = false;
		if (is_array($w))
			$w = $w[0];
		$wu = ToUpper($w);

		if(!$this->no_bool_lang)
		{
			if(preg_match("/^(OR|AND|NOT|WITHOUT)$/", $wu))
			{
				return $w;
			}
			elseif($this->rus_bool_lang == 'yes')
			{
				if($preg_ru === false)
					$preg_ru = "/^(".ToUpper(GetMessage("SEARCH_TERM_OR")."|".GetMessage("SEARCH_TERM_AND")."|".GetMessage("SEARCH_TERM_NOT_1")."|".GetMessage("SEARCH_TERM_NOT_2")).")$/".BX_UTF_PCRE_MODIFIER;
				if(preg_match($preg_ru, $wu))
					return $w;
			}
		}

		if(preg_match("/cut[56]/i", $w))
			return $w;
		$arrStem = array_keys(stemming($w, $this->m_lang));
		if(count($arrStem) < 1)
			return " ";
		else
		{
			$this->bStemming = true;
			return $arrStem[0];
		}
	}
Ejemplo n.º 6
0
function calculate_tf($arr_kata)
{
    $jml_kata = count($arr_kata);
    for ($i = 0; $i < $jml_kata; $i++) {
        $term = stemming($arr_kata[$i]);
        // calculate tf
        if (!isset($doc_terms[$term])) {
            $doc_terms[$term] = 0;
        }
        $doc_terms[$term]++;
    }
    return $doc_terms;
}
Ejemplo n.º 7
0
 /**
  * @deprecated
  */
 static function WriteWordsInTable($M_ID, $SITE_ID, $s)
 {
     global $DB;
     if (!self::CheckModule()) {
         return;
     }
     $err_mess = self::err_mess() . "<br>Function: writeWordsInTable<br>Line: ";
     $M_ID = intval($M_ID);
     $ticketSearch = self::TABLE_NAME;
     $rsSite = CSite::GetByID($SITE_ID);
     $arrSite = $rsSite->Fetch();
     $langID = $arrSite["LANGUAGE_ID"];
     $DB->Query("DELETE FROM {$ticketSearch} WHERE MESSAGE_ID = {$M_ID}", false, $err_mess . __LINE__);
     $res = stemming(HTMLToTxt($s), $langID);
     foreach ($res as $key => $val) {
         $strSql = "INSERT INTO " . $ticketSearch . "(MESSAGE_ID, SEARCH_WORD) VALUES ({$M_ID}, '" . $DB->ForSql($key) . "')";
         $res = $DB->Query($strSql, false, $err_mess . __LINE__);
         //$DB->Insert($ticketSearch, array("MESSAGE_ID" => $M_ID, "SEARCH_WORD" => "'" . $DB->ForSql($key) . "'"), $err_mess . __LINE__);
     }
 }
Ejemplo n.º 8
0
<?php

$kalimat = file_get_contents('home_text.txt', true);
if (@$_POST['submit']) {
    $kalimat = $_REQUEST['kalimat'];
    $kata = tokenising($kalimat);
    $hasil = filtering($kata, "id");
    echo '<strong>Kalimat :</strong>' . '<br>';
    echo $kalimat . '<br><br>';
    //--------- hasil Stemming --------------------
    echo '<font color=blue> Hasil Stemming </font>' . '<br>';
    $n = count($hasil);
    for ($i = 0; $i < $n; $i++) {
        $term = stemming($hasil[$i]);
        echo 'kata ke ' . ($i + 1) . ' ' . $hasil[$i] . ' ==> hasil : <strong>' . stemming($hasil[$i]) . '</strong><br>';
    }
    echo '<strong>Jumlah kata ada : ' . $n . '</strong><br>';
    exit;
}
?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
	<head>
		<meta http-equiv="Content-Type" content="text/html; charset=windows-1252" />
		<title>Stemming</title>
	</head>
	
	<body>
		<p><strong>SIMULASI PROSES STEMMING </strong></p>
		<p>Masukkan kata/kalimat:</p>
		<form method="post">
Ejemplo n.º 9
0
 public function init()
 {
     /*
      * Clean OPL Baru
      */
     $array_baru = [];
     $clean_opl_baru = $this->clean($this->opl_baru);
     $this->array_string_opl[] = $clean_opl_baru;
     $word_opl_baru = explode(" ", $clean_opl_baru);
     foreach ($word_opl_baru as $word_baru) {
         $array_baru[] = stemming($word_baru) == null ? $word_baru : stemming($word_baru);
     }
     $this->array_word_opl[] = $array_baru;
     /*
      * Clean OPL Lama
      */
     foreach ($this->array_opl_lama as $opl_lama) {
         $array_lama = [];
         $clean_opl_lama = $this->clean($opl_lama);
         $this->array_string_opl[] = $clean_opl_lama;
         $word_opl_lama = explode(" ", $clean_opl_lama);
         foreach ($word_opl_lama as $word_lama) {
             $array_lama[] = stemming($word_lama) == null ? $word_lama : stemming($word_lama);
         }
         $this->array_word_opl[] = $array_lama;
     }
     foreach ($this->array_word_opl as $word_opl) {
         foreach ($word_opl as $word) {
             $this->word_opl[] = $word;
         }
     }
     $this->word_opl = array_unique($this->word_opl);
     $this->word_opl = array_filter($this->word_opl);
 }