/** * Post-process the search word array so it will match the words that was indexed (including case-folding if any) * If any words are splitted into multiple words (eg. CJK will be!) the operator of the main word will remain. * * @param array $searchWords Search word array * @return array Search word array, processed through lexer */ protected function procSearchWordsByLexer($searchWords) { $newSearchWords = array(); // Init lexer (used to post-processing of search words) $lexerObjRef = $GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['lexer'] ? $GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['lexer'] : 'EXT:indexed_search/Classes/Lexer.php:&TYPO3\\CMS\\IndexedSearch\\Lexer'; $this->lexerObj = GeneralUtility::getUserObj($lexerObjRef); // Traverse the search word array foreach ($searchWords as $wordDef) { // No space in word (otherwise it might be a sentense in quotes like "there is"). if (strpos($wordDef['sword'], ' ') === FALSE) { // Split the search word by lexer: $res = $this->lexerObj->split2Words($wordDef['sword']); // Traverse lexer result and add all words again: foreach ($res as $word) { $newSearchWords[] = array('sword' => $word, 'oper' => $wordDef['oper']); } } else { $newSearchWords[] = $wordDef; } } return $newSearchWords; }
/** * Post-process the search word array so it will match the words that was indexed (including case-folding if any) * If any words are splitted into multiple words (eg. CJK will be!) the operator of the main word will remain. * * @param array Search word array * @return array Search word array, processed through lexer * @todo Define visibility */ public function procSearchWordsByLexer($SWArr) { // Init output variable: $newSWArr = array(); // Traverse the search word array: foreach ($SWArr as $wordDef) { if (!strstr($wordDef['sword'], ' ')) { // No space in word (otherwise it might be a sentense in quotes like "there is"). // Split the search word by lexer: $res = $this->lexerObj->split2Words($wordDef['sword']); // Traverse lexer result and add all words again: foreach ($res as $word) { $newSWArr[] = array('sword' => $word, 'oper' => $wordDef['oper']); } } else { $newSWArr[] = $wordDef; } } // Return result: return $newSWArr; }
/** * Processing words in the array from split*Content -functions * * @param array $contentArr Array of content to index, see splitHTMLContent() and splitRegularContent() * @return array Content input array modified so each key is not a unique array of words */ public function processWordsInArrays($contentArr) { // split all parts to words foreach ($contentArr as $key => $value) { $contentArr[$key] = $this->lexerObj->split2Words($contentArr[$key]); } // For title, keywords, and description we don't want duplicates: $contentArr['title'] = array_unique($contentArr['title']); $contentArr['keywords'] = array_unique($contentArr['keywords']); $contentArr['description'] = array_unique($contentArr['description']); // Return modified array: return $contentArr; }