function searchstring_to_array($text) { //Codes entfernen $text = clear_codes($text); //Wörter trennen $words = extract_words($text); $words = array_unique($words); //Stopwörter löschen include 'lib/stopwords.php'; $filteredwords = array_diff($words, $stopwords); $ignored = array_intersect($words, $stopwords); //Wörter filtern foreach ($filteredwords as $key => $word) { $word = trim($word); if (!$word || strlen($word) < 3 || strlen($word) > 50) { if ($word) { $ignored[] = $word; } unset($filteredwords[$key]); } } $filteredwords = array_map('strtolower', $filteredwords); //Kleinschreibung für Suchbegriffe return array(array_unique($filteredwords), array_unique($ignored)); }
function update_index($text, $threadid, $postid, $title = false) { global $set, $db, $apx; $threadid = (int) $threadid; $postid = (int) $postid; $title = (int) $title; if (!$threadid) { return false; } if (!$postid) { return false; } //Codes entfernen while (preg_match('#\\[([a-z0-9]+)(=.*?)?\\](.*?)\\[/\\1\\]#si', $text)) { $text = preg_replace('#\\[([a-z0-9]+)(=.*?)?\\](.*?)\\[/\\1\\]#si', '\\3', $text); } //Wörter trennen $text = strtolower($text); $words = extract_words($text); $words = array_unique($words); //SQL erzeugen include '../forum/lib/stopwords.php'; $values = ''; foreach ($words as $word) { $word = trim($word); if (!$word) { continue; } //Leere Wörter überspringen if (strlen($word) < 3) { continue; } //Wörter kürzer als 3 Zeichen überspringen if (strlen($word) > 50) { continue; } //Wörter länger als 50 Zeichen überspringen if (in_array($word, $stopwords)) { continue; } //Stopwörter überspringen $values .= iif($values, ',') . "('" . addslashes(strtolower($word)) . "','" . $threadid . "','" . $postid . "','" . $title . "')"; } //In die Datenbank eintragen if ($values) { $db->query("INSERT INTO " . PRE . "_forum_index (word,threadid,postid,istitle) VALUES " . $values); } return true; }
function separate_words($string) { $ci = load_ci(); $words = clean_string(extract_words($string)); $wordsArray = array_keys($words); $x = 0; foreach ($wordsArray as $str) { $countWords = count($wordsArray) - 1; if ($countWords != $x) { $wordSeparator = ','; } else { $wordSeparator = ''; } echo preg_replace('/(\\w{2,})(?=.*?\\1)\\W*/', '', $str) . $wordSeparator; $x++; } }