function CheckKeyword($keyword, $bStemmed = false) { if (!is_array($this->__index)) { return false; } if (is_array($keyword)) { $arResult = array(); foreach ($keyword as $key => $word) { $arResult[$key] = $this->CheckKeyword($bStemmed ? $key : $word, $bStemmed); } return $arResult; } if (!$bStemmed && $this->bSearch) { $keyword = stemming($keyword, $this->__lang); } if (is_array($keyword)) { return $this->CheckKeyword($keyword, true); } $arResult = array('TOTAL' => intval($this->__index['TOTAL'][$keyword]), 'BOLD' => intval($this->__index['BOLD'][$keyword]), 'ITALIC' => intval($this->__index['ITALIC'][$keyword]), 'LINK' => intval($this->__index['LINK'][$keyword]), 'LINK_EXTERNAL' => intval($this->__index['LINK_EXTERNAL'][$keyword]), 'DESCRIPTION' => intval($this->__index['DESCRIPTION'][$keyword]), 'KEYWORDS' => intval($this->__index['KEYWORDS'][$keyword]), 'TITLE' => intval($this->__index['TITLE'][$keyword]), 'H1' => intval($this->__index['H1'][$keyword]), 'CONTRAST' => $this->_GetContrast($keyword)); return $arResult; }
function StemIndex($arLID, $ID, $sContent) { $DB = CDatabase::GetModuleConnection('search'); static $CACHE_SITE_LANGS = array(); $ID = intval($ID); $arLang = array(); if (!is_array($arLID)) { $arLID = array(); } foreach ($arLID as $site => $url) { if (!array_key_exists($site, $CACHE_SITE_LANGS)) { $db_site_tmp = CSite::GetByID($site); if ($ar_site_tmp = $db_site_tmp->Fetch()) { $CACHE_SITE_LANGS[$site] = array("LANGUAGE_ID" => $ar_site_tmp["LANGUAGE_ID"], "CHARSET" => $ar_site_tmp["CHARSET"], "SERVER_NAME" => $ar_site_tmp["SERVER_NAME"]); } else { $CACHE_SITE_LANGS[$site] = false; } } if (is_array($CACHE_SITE_LANGS[$site])) { $arLang[$CACHE_SITE_LANGS[$site]["LANGUAGE_ID"]] = true; } } foreach ($arLang as $lang => $value) { $sql_lang = $DB->ForSql($lang); $arDoc = stemming($sContent, $lang); $docLength = array_sum($arDoc); if (BX_SEARCH_VERSION > 1) { $arPos = stemming($sContent, $lang, false, true); CSearch::RegisterStem($arDoc); } if ($docLength > 0) { $doc = ""; $logDocLength = log($docLength < 20 ? 20 : $docLength); $strSqlPrefix = "\n\t\t\t\t\t\tinsert ignore into b_search_content_stem\n\t\t\t\t\t\t(SEARCH_CONTENT_ID, LANGUAGE_ID, STEM, TF" . (BX_SEARCH_VERSION > 1 ? ",PS" : "") . ")\n\t\t\t\t\t\tvalues\n\t\t\t\t"; $maxValuesLen = 2048; $strSqlValues = ""; if (BX_SEARCH_VERSION > 1) { foreach ($arDoc as $word => $count) { $stem_id = CSearch::RegisterStem($word); //This is almost impossible, but happens if ($stem_id > 0) { $strSqlValues .= ",\n(" . $ID . ", '" . $sql_lang . "'" . ", " . CSearch::RegisterStem($word) . ", " . number_format(log($count + 1) / $logDocLength, 4, ".", "") . ", " . number_format($arPos[$word] / $count, 4, ".", "") . ")"; } if (strlen($strSqlValues) > $maxValuesLen) { $DB->Query($strSqlPrefix . substr($strSqlValues, 2), false, "File: " . __FILE__ . "<br>Line: " . __LINE__); $strSqlValues = ""; } } } else { foreach ($arDoc as $word => $count) { $strSqlValues .= ",\n(" . $ID . ", '" . $sql_lang . "'" . ", '" . $DB->ForSQL($word) . "'" . ", " . number_format(log($count + 1) / $logDocLength, 4, ".", "") . ")"; if (strlen($strSqlValues) > $maxValuesLen) { $DB->Query($strSqlPrefix . substr($strSqlValues, 2), false, "File: " . __FILE__ . "<br>Line: " . __LINE__); $strSqlValues = ""; } } } if (strlen($strSqlValues) > 0) { $DB->Query($strSqlPrefix . substr($strSqlValues, 2), false, "File: " . __FILE__ . "<br>Line: " . __LINE__); $strSqlValues = ""; } } } }
public static function Edit($Params) { global $DB; $source_id = false; $arFields = $Params['arFields']; $bNew = !isset($arFields['ID']) || $arFields['ID'] <= 0; $bFile_FD = $Params['path'] && strlen($Params['path']) > 0; $bFile_PC = $Params['file'] && strlen($Params['file']['name']) > 0 && $Params['file']['size'] > 0; $io = CBXVirtualIo::GetInstance(); if ($bFile_FD || $bFile_PC) { if ($bFile_FD) { $DocRoot = CSite::GetSiteDocRoot(false); $tmp_name = $DocRoot . $Params['path']; if ($io->FileExists($tmp_name)) { $flTmp = $io->GetFile($tmp_name); $file_name = substr($Params['path'], strrpos($Params['path'], '/') + 1); $arFile = array("name" => $file_name, "size" => $flTmp->GetFileSize(), "tmp_name" => $tmp_name, "type" => CFile::IsImage($file_name) ? 'image' : 'file'); } } else { if ($bFile_PC) { $arFile = $Params['file']; } } if (!CMedialib::CheckFileExtention($arFile["name"])) { return false; } if (!$bNew) { $arFile["old_file"] = CMedialibItem::GetSourceId($arFields['ID']); $arFile["del"] = "Y"; } // Resizing Image if (CFile::IsImage($arFile["name"])) { $arSize = array('width' => COption::GetOptionInt('fileman', "ml_max_width", 1024), 'height' => COption::GetOptionInt('fileman', "ml_max_height", 1024)); $res = CFile::ResizeImage($arFile, $arSize); } $arFile["MODULE_ID"] = "fileman"; $source_id = CFile::SaveFile($arFile, "medialibrary"); if ($source_id) { $r = CFile::GetByID($source_id); if ($arFile = $r->Fetch()) { if (CFile::IsImage($arFile['FILE_NAME'])) { CMedialibItem::GenerateThumbnail($arFile, array('width' => COption::GetOptionInt('fileman', "ml_thumb_width", 140), 'height' => COption::GetOptionInt('fileman', "ml_thumb_height", 105))); } $arFile['PATH'] = CMedialibItem::GetFullPath($arFile); } } } // TODO: Add error handling if ($bNew && !$source_id) { return false; } // 2. Add to b_medialib_item if (!isset($arFields['~DATE_UPDATE'])) { $arFields['~DATE_UPDATE'] = $DB->CurrentTimeFunction(); } if (!CMedialibItem::CheckFields($arFields)) { return false; } if (CModule::IncludeModule("search")) { $arStem = stemming($arFields['NAME'] . ' ' . $arFields['DESCRIPTION'] . ' ' . $arFields['KEYWORDS'], LANGUAGE_ID); if (count($arStem) > 0) { $arFields['SEARCHABLE_CONTENT'] = '{' . implode('}{', array_keys($arStem)) . '}'; } else { $arFields['SEARCHABLE_CONTENT'] = ''; } } if ($bNew) { unset($arFields['ID']); $arFields['SOURCE_ID'] = $source_id; $arFields['~DATE_CREATE'] = $arFields['~DATE_UPDATE']; $arFields['ITEM_TYPE'] = ''; $ID = CDatabase::Add("b_medialib_item", $arFields, array("DESCRIPTION", "SEARCHABLE_CONTENT")); } else { if ($source_id) { $arFields['SOURCE_ID'] = $source_id; } $ID = $arFields['ID']; unset($arFields['ID']); $strUpdate = $DB->PrepareUpdate("b_medialib_item", $arFields); $strSql = "UPDATE b_medialib_item SET " . $strUpdate . " WHERE ID=" . IntVal($ID); $DB->QueryBind($strSql, array("DESCRIPTION" => $arFields["DESCRIPTION"], "SEARCHABLE_CONTENT" => $arFields["SEARCHABLE_CONTENT"]), false, "File: " . __FILE__ . "<br>Line: " . __LINE__); } // 3. Set fields to b_medialib_collection_item if (!$bNew) { $strSql = "DELETE FROM b_medialib_collection_item WHERE ITEM_ID=" . IntVal($ID); $DB->Query($strSql, false, "FILE: " . __FILE__ . "<br> LINE: " . __LINE__); } $strCollections = "0"; for ($i = 0, $l = count($Params['arCollections']); $i < $l; $i++) { $strCollections .= "," . IntVal($Params['arCollections'][$i]); } $strSql = "INSERT INTO b_medialib_collection_item(ITEM_ID, COLLECTION_ID) " . "SELECT " . intVal($ID) . ", ID " . "FROM b_medialib_collection " . "WHERE ID in (" . $strCollections . ")"; $res = $DB->Query($strSql, false, "FILE: " . __FILE__ . "<br> LINE: " . __LINE__); if (!$arFields['ID']) { $arFields['ID'] = $ID; } if ($source_id) { $arFields = array_merge($arFile, $arFields); } return $arFields; }
function StemIndex($arLID, $ID, $sContent) { global $DB; $arLang = array(); if (!is_array($arLID)) { $arLID = array(); } foreach ($arLID as $site => $url) { if (!isset($GLOBALS["CACHE_SEARCH_SITE_LANGS"][$site])) { $db_site_tmp = CSite::GetByID($site); if ($ar_site_tmp = $db_site_tmp->Fetch()) { $GLOBALS["CACHE_SEARCH_SITE_LANGS"][$site] = array("LANGUAGE_ID" => $ar_site_tmp["LANGUAGE_ID"], "CHARSET" => $ar_site_tmp["CHARSET"], "SERVER_NAME" => $ar_site_tmp["SERVER_NAME"]); } } if (isset($GLOBALS["CACHE_SEARCH_SITE_LANGS"][$site])) { $arLang[$GLOBALS["CACHE_SEARCH_SITE_LANGS"][$site]["LANGUAGE_ID"]]++; } } foreach ($arLang as $lang => $value) { $arDoc = stemming($sContent, $lang); $docLength = 0; foreach ($arDoc as $word => $count) { $docLength += $count; } if ($docLength > 0) { $doc = ""; $logDocLength = log($docLength < 20 ? 20 : $docLength); $strSqlPrefix = "\n\t\t\t\t\t\tinsert into b_search_content_stem\n\t\t\t\t\t\t(SEARCH_CONTENT_ID, LANGUAGE_ID, STEM, TF)\n\t\t\t\t\t\tSELECT " . $ID . ", '" . $lang . "', T.STEM, T.TF\n\t\t\t\t\t\tFROM table(cast(f_stem('\n\t\t\t\t"; $maxValuesLen = 1024; $strSqlValues = ""; foreach ($arDoc as $word => $count) { $strSqlValues .= " " . $word . ";" . number_format(log($count + 1) / $docLength, 4, ".", ""); if (strlen($strSqlValues) > $maxValuesLen) { $DB->Query($strSqlPrefix . substr($strSqlValues, 1) . "') as tt_stem)) t", false, "File: " . __FILE__ . "<br>Line: " . __LINE__); $strSqlValues = ""; } } if (strlen($strSqlValues) > 0) { $DB->Query($strSqlPrefix . substr($strSqlValues, 1) . "') as tt_stem)) t", false, "File: " . __FILE__ . "<br>Line: " . __LINE__); $strSqlValues = ""; } } } }
function StemWord($w) { static $preg_ru = false; if (is_array($w)) $w = $w[0]; $wu = ToUpper($w); if(!$this->no_bool_lang) { if(preg_match("/^(OR|AND|NOT|WITHOUT)$/", $wu)) { return $w; } elseif($this->rus_bool_lang == 'yes') { if($preg_ru === false) $preg_ru = "/^(".ToUpper(GetMessage("SEARCH_TERM_OR")."|".GetMessage("SEARCH_TERM_AND")."|".GetMessage("SEARCH_TERM_NOT_1")."|".GetMessage("SEARCH_TERM_NOT_2")).")$/".BX_UTF_PCRE_MODIFIER; if(preg_match($preg_ru, $wu)) return $w; } } if(preg_match("/cut[56]/i", $w)) return $w; $arrStem = array_keys(stemming($w, $this->m_lang)); if(count($arrStem) < 1) return " "; else { $this->bStemming = true; return $arrStem[0]; } }
function calculate_tf($arr_kata) { $jml_kata = count($arr_kata); for ($i = 0; $i < $jml_kata; $i++) { $term = stemming($arr_kata[$i]); // calculate tf if (!isset($doc_terms[$term])) { $doc_terms[$term] = 0; } $doc_terms[$term]++; } return $doc_terms; }
/** * @deprecated */ static function WriteWordsInTable($M_ID, $SITE_ID, $s) { global $DB; if (!self::CheckModule()) { return; } $err_mess = self::err_mess() . "<br>Function: writeWordsInTable<br>Line: "; $M_ID = intval($M_ID); $ticketSearch = self::TABLE_NAME; $rsSite = CSite::GetByID($SITE_ID); $arrSite = $rsSite->Fetch(); $langID = $arrSite["LANGUAGE_ID"]; $DB->Query("DELETE FROM {$ticketSearch} WHERE MESSAGE_ID = {$M_ID}", false, $err_mess . __LINE__); $res = stemming(HTMLToTxt($s), $langID); foreach ($res as $key => $val) { $strSql = "INSERT INTO " . $ticketSearch . "(MESSAGE_ID, SEARCH_WORD) VALUES ({$M_ID}, '" . $DB->ForSql($key) . "')"; $res = $DB->Query($strSql, false, $err_mess . __LINE__); //$DB->Insert($ticketSearch, array("MESSAGE_ID" => $M_ID, "SEARCH_WORD" => "'" . $DB->ForSql($key) . "'"), $err_mess . __LINE__); } }
<?php $kalimat = file_get_contents('home_text.txt', true); if (@$_POST['submit']) { $kalimat = $_REQUEST['kalimat']; $kata = tokenising($kalimat); $hasil = filtering($kata, "id"); echo '<strong>Kalimat :</strong>' . '<br>'; echo $kalimat . '<br><br>'; //--------- hasil Stemming -------------------- echo '<font color=blue> Hasil Stemming </font>' . '<br>'; $n = count($hasil); for ($i = 0; $i < $n; $i++) { $term = stemming($hasil[$i]); echo 'kata ke ' . ($i + 1) . ' ' . $hasil[$i] . ' ==> hasil : <strong>' . stemming($hasil[$i]) . '</strong><br>'; } echo '<strong>Jumlah kata ada : ' . $n . '</strong><br>'; exit; } ?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=windows-1252" /> <title>Stemming</title> </head> <body> <p><strong>SIMULASI PROSES STEMMING </strong></p> <p>Masukkan kata/kalimat:</p> <form method="post">
public function init() { /* * Clean OPL Baru */ $array_baru = []; $clean_opl_baru = $this->clean($this->opl_baru); $this->array_string_opl[] = $clean_opl_baru; $word_opl_baru = explode(" ", $clean_opl_baru); foreach ($word_opl_baru as $word_baru) { $array_baru[] = stemming($word_baru) == null ? $word_baru : stemming($word_baru); } $this->array_word_opl[] = $array_baru; /* * Clean OPL Lama */ foreach ($this->array_opl_lama as $opl_lama) { $array_lama = []; $clean_opl_lama = $this->clean($opl_lama); $this->array_string_opl[] = $clean_opl_lama; $word_opl_lama = explode(" ", $clean_opl_lama); foreach ($word_opl_lama as $word_lama) { $array_lama[] = stemming($word_lama) == null ? $word_lama : stemming($word_lama); } $this->array_word_opl[] = $array_lama; } foreach ($this->array_word_opl as $word_opl) { foreach ($word_opl as $word) { $this->word_opl[] = $word; } } $this->word_opl = array_unique($this->word_opl); $this->word_opl = array_filter($this->word_opl); }