/** * Processing words in the array from split*Content -functions * * @param array Array of content to index, see splitHTMLContent() and splitRegularContent() * @return array Content input array modified so each key is not a unique array of words */ function processWordsInArrays($contentArr) { // split all parts to words foreach ($contentArr as $key => $value) { $contentArr[$key] = $this->lexerObj->split2Words($contentArr[$key]); } // For title, keywords, and description we don't want duplicates: $contentArr['title'] = array_unique($contentArr['title']); $contentArr['keywords'] = array_unique($contentArr['keywords']); $contentArr['description'] = array_unique($contentArr['description']); // Return modified array: return $contentArr; }
/** * Post-process the search word array so it will match the words that was indexed (including case-folding if any) * If any words are splitted into multiple words (eg. CJK will be!) the operator of the main word will remain. * * @param array Search word array * @return array Search word array, processed through lexer */ function procSearchWordsByLexer($SWArr) { // Init output variable: $newSWArr = array(); // Traverse the search word array: foreach ($SWArr as $wordDef) { if (!strstr($wordDef['sword'], ' ')) { // No space in word (otherwise it might be a sentense in quotes like "there is"). // Split the search word by lexer: $res = $this->lexerObj->split2Words($wordDef['sword']); // Traverse lexer result and add all words again: foreach ($res as $word) { $newSWArr[] = array('sword' => $word, 'oper' => $wordDef['oper']); } } else { $newSWArr[] = $wordDef; } } // Return result: return $newSWArr; }