/** * Splitting string into words. * Used for indexing, can also be used to find words in query. * * @param string String with UTF-8 content to process. * @return array Array of words in utf-8 * @todo Define visibility */ public function split2Words($wordString) { // Reset debug string: $this->debugString = ''; // Then convert the string to lowercase: if (!$this->lexerConf['casesensitive']) { $wordString = $this->csObj->conv_case('utf-8', $wordString, 'toLower'); } // Now, splitting words: $len = 0; $start = 0; $pos = 0; $words = array(); $this->debugString = ''; while (1) { list($start, $len) = $this->get_word($wordString, $pos); if ($len) { $this->addWords($words, $wordString, $start, $len); if ($this->debug) { $this->debugString .= '<span style="color:red">' . htmlspecialchars(substr($wordString, $pos, $start - $pos)) . '</span>' . htmlspecialchars(substr($wordString, $start, $len)); } $pos = $start + $len; } else { break; } } return $words; }
/** * Converts a given string to a string that can be used as a URL segment. * The result is not url-encoded. * * @param string $string * @param string $spaceCharacter * @return string */ public function convertToSafeString($string, $spaceCharacter = '-') { $processedTitle = $this->csConvertor->conv_case('utf-8', $string, 'toLower'); $processedTitle = strip_tags($processedTitle); $processedTitle = preg_replace('/[ \\-+_]+/', $spaceCharacter, $processedTitle); $processedTitle = $this->csConvertor->specCharsToASCII('utf-8', $processedTitle); $processedTitle = preg_replace('/[^\\p{L}0-9' . preg_quote($spaceCharacter) . ']/u', '', $processedTitle); $processedTitle = preg_replace('/' . preg_quote($spaceCharacter) . '{2,}/', $spaceCharacter, $processedTitle); $processedTitle = trim($processedTitle, $spaceCharacter); // TODO Post-processing hook here $processedTitle = strtolower($processedTitle); return $processedTitle; }
/** * Handler for the opening of a tag */ public function startHandler($xml_parser, $tag, $attributes) { if ((string) $this->xmlCharacterData !== '') { $this->spellCheckHandler($xml_parser, $this->xmlCharacterData); $this->xmlCharacterData = ''; } switch ($tag) { case 'spellchecker': break; case 'br': case 'BR': case 'img': case 'IMG': case 'hr': case 'HR': case 'area': case 'AREA': $this->text .= '<' . $this->csConvObj->conv_case($this->parserCharset, $tag, 'toLower') . ' '; foreach ($attributes as $key => $val) { $this->text .= $key . '="' . $val . '" '; } $this->text .= ' />'; break; default: $this->text .= '<' . $this->csConvObj->conv_case($this->parserCharset, $tag, 'toLower') . ' '; foreach ($attributes as $key => $val) { $this->text .= $key . '="' . $val . '" '; } $this->text .= '>'; } }
/** * Check if $value is valid. If it is not valid, needs to add an error * to result. * * @param mixed $value * @return void */ public function isValid($value) { if (empty($value) || !is_string($value)) { return; } $allowedOptionsArray = GeneralUtility::trimExplode(',', $this->options['array'], true); if (!empty($this->options['ignorecase'])) { $value = $this->charsetConverter->conv_case('utf-8', $value, 'toLower'); foreach ($allowedOptionsArray as &$option) { $option = $this->charsetConverter->conv_case('utf-8', $option, 'toLower'); } } if (!in_array($value, $allowedOptionsArray, !empty($this->options['strict']))) { $this->addError($this->renderMessage($this->options['errorMessage'][0], $this->options['errorMessage'][1], 'error'), 1442002594); } }
/** * Initialize internal variables, especially selector box values for the search form and search words * * @return void */ public function initialize() { // Indexer configuration from Extension Manager interface: $this->indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']); $this->enableMetaphoneSearch = (bool) $this->indexerConfig['enableMetaphoneSearch']; $this->storeMetaphoneInfoAsWords = !\TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed('index_words'); $this->timeTracker = GeneralUtility::makeInstance(TimeTracker::class); // Initialize external document parsers for icon display and other soft operations if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['external_parsers'])) { foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['external_parsers'] as $extension => $_objRef) { $this->external_parsers[$extension] = GeneralUtility::getUserObj($_objRef); // Init parser and if it returns FALSE, unset its entry again: if (!$this->external_parsers[$extension]->softInit($extension)) { unset($this->external_parsers[$extension]); } } } // Init lexer (used to post-processing of search words) $lexerObjRef = $GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['lexer'] ?: \TYPO3\CMS\IndexedSearch\Lexer::class; $this->lexerObj = GeneralUtility::getUserObj($lexerObjRef); // If "_sections" is set, this value overrides any existing value. if ($this->piVars['_sections']) { $this->piVars['sections'] = $this->piVars['_sections']; } // If "_sections" is set, this value overrides any existing value. if ($this->piVars['_freeIndexUid'] !== '_') { $this->piVars['freeIndexUid'] = $this->piVars['_freeIndexUid']; } // Add previous search words to current if ($this->piVars['sword_prev_include'] && $this->piVars['sword_prev']) { $this->piVars['sword'] = trim($this->piVars['sword_prev']) . ' ' . $this->piVars['sword']; } $this->piVars['results'] = MathUtility::forceIntegerInRange($this->piVars['results'], 1, 100, $this->defaultResultNumber); // Make sure that some cropping and markup constants used later are defined $this->loadSettings(); // Selector-box values defined here: $this->optValues = array('type' => array('0' => $this->pi_getLL('opt_type_0'), '1' => $this->pi_getLL('opt_type_1'), '2' => $this->pi_getLL('opt_type_2'), '3' => $this->pi_getLL('opt_type_3'), '10' => $this->pi_getLL('opt_type_10'), '20' => $this->pi_getLL('opt_type_20')), 'defOp' => array('0' => $this->pi_getLL('opt_defOp_0'), '1' => $this->pi_getLL('opt_defOp_1')), 'sections' => array('0' => $this->pi_getLL('opt_sections_0'), '-1' => $this->pi_getLL('opt_sections_-1'), '-2' => $this->pi_getLL('opt_sections_-2'), '-3' => $this->pi_getLL('opt_sections_-3')), 'freeIndexUid' => array('-1' => $this->pi_getLL('opt_freeIndexUid_-1'), '-2' => $this->pi_getLL('opt_freeIndexUid_-2'), '0' => $this->pi_getLL('opt_freeIndexUid_0')), 'media' => array('-1' => $this->pi_getLL('opt_media_-1'), '0' => $this->pi_getLL('opt_media_0'), '-2' => $this->pi_getLL('opt_media_-2')), 'order' => array('rank_flag' => $this->pi_getLL('opt_order_rank_flag'), 'rank_freq' => $this->pi_getLL('opt_order_rank_freq'), 'rank_first' => $this->pi_getLL('opt_order_rank_first'), 'rank_count' => $this->pi_getLL('opt_order_rank_count'), 'mtime' => $this->pi_getLL('opt_order_mtime'), 'title' => $this->pi_getLL('opt_order_title'), 'crdate' => $this->pi_getLL('opt_order_crdate')), 'group' => array('sections' => $this->pi_getLL('opt_group_sections'), 'flat' => $this->pi_getLL('opt_group_flat')), 'lang' => array(-1 => $this->pi_getLL('opt_lang_-1'), 0 => $this->pi_getLL('opt_lang_0')), 'desc' => array('0' => $this->pi_getLL('opt_desc_0'), '1' => $this->pi_getLL('opt_desc_1')), 'results' => array('10' => '10', '20' => '20', '50' => '50', '100' => '100')); // Remove this option if metaphone search is disabled) if (!$this->enableMetaphoneSearch) { unset($this->optValues['type']['10']); } // Free Index Uid: if ($this->conf['search.']['defaultFreeIndexUidList']) { $uidList = GeneralUtility::intExplode(',', $this->conf['search.']['defaultFreeIndexUidList']); $indexCfgRecords = $this->databaseConnection->exec_SELECTgetRows('uid,title', 'index_config', 'uid IN (' . implode(',', $uidList) . ')' . $this->cObj->enableFields('index_config'), '', '', '', 'uid'); foreach ($uidList as $uidValue) { if (is_array($indexCfgRecords[$uidValue])) { $this->optValues['freeIndexUid'][$uidValue] = $indexCfgRecords[$uidValue]['title']; } } } // Should we use join_pages instead of long lists of uids? if ($this->conf['search.']['skipExtendToSubpagesChecking']) { $this->join_pages = 1; } // Add media to search in: if (trim($this->conf['search.']['mediaList']) !== '') { $mediaList = implode(',', GeneralUtility::trimExplode(',', $this->conf['search.']['mediaList'], true)); } foreach ($this->external_parsers as $extension => $obj) { // Skip unwanted extensions if ($mediaList && !GeneralUtility::inList($mediaList, $extension)) { continue; } if ($name = $obj->searchTypeMediaTitle($extension)) { $this->optValues['media'][$extension] = $this->pi_getLL('opt_sections_' . $extension, $name); } } // Add operators for various languages // Converts the operators to lowercase $this->operator_translate_table[] = array($this->charsetConverter->conv_case('utf-8', $this->pi_getLL('local_operator_AND'), 'toLower'), 'AND'); $this->operator_translate_table[] = array($this->charsetConverter->conv_case('utf-8', $this->pi_getLL('local_operator_OR'), 'toLower'), 'OR'); $this->operator_translate_table[] = array($this->charsetConverter->conv_case('utf-8', $this->pi_getLL('local_operator_NOT'), 'toLower'), 'AND NOT'); // This is the id of the site root. This value may be a commalist of integer (prepared for this) $this->wholeSiteIdList = (int) $this->frontendController->config['rootLine'][0]['uid']; // Creating levels for section menu: // This selects the first and secondary menus for the "sections" selector - so we can search in sections and sub sections. if ($this->conf['show.']['L1sections']) { $firstLevelMenu = $this->getMenu($this->wholeSiteIdList); foreach ($firstLevelMenu as $optionName => $mR) { if (!$mR['nav_hide']) { $this->optValues['sections']['rl1_' . $mR['uid']] = trim($this->pi_getLL('opt_RL1') . ' ' . $mR['title']); if ($this->conf['show.']['L2sections']) { $secondLevelMenu = $this->getMenu($mR['uid']); foreach ($secondLevelMenu as $kk2 => $mR2) { if (!$mR2['nav_hide']) { $this->optValues['sections']['rl2_' . $mR2['uid']] = trim($this->pi_getLL('opt_RL2') . ' ' . $mR2['title']); } else { unset($secondLevelMenu[$kk2]); } } $this->optValues['sections']['rl2_' . implode(',', array_keys($secondLevelMenu))] = $this->pi_getLL('opt_RL2ALL'); } } else { unset($firstLevelMenu[$optionName]); } } $this->optValues['sections']['rl1_' . implode(',', array_keys($firstLevelMenu))] = $this->pi_getLL('opt_RL1ALL'); } // Setting the list of root IDs for the search. Notice, these page IDs MUST have a TypoScript template with root flag on them! Basically this list is used to select on the "rl0" field and page ids are registered as "rl0" only if a TypoScript template record with root flag is there. // This happens AFTER the use of $this->wholeSiteIdList above because the above will then fetch the menu for the CURRENT site - regardless of this kind of searching here. Thus a general search will lookup in the WHOLE database while a specific section search will take the current sections... if ($this->conf['search.']['rootPidList']) { $this->wholeSiteIdList = implode(',', GeneralUtility::intExplode(',', $this->conf['search.']['rootPidList'])); } // Load the template $this->templateCode = $this->cObj->fileResource($this->conf['templateFile']); // Add search languages: $res = $this->databaseConnection->exec_SELECTquery('*', 'sys_language', '1=1' . $this->cObj->enableFields('sys_language')); while (false !== ($data = $this->databaseConnection->sql_fetch_assoc($res))) { $this->optValues['lang'][$data['uid']] = $data['title']; } $this->databaseConnection->sql_free_result($res); // Calling hook for modification of initialized content if ($hookObj = $this->hookRequest('initialize_postProc')) { $hookObj->initialize_postProc(); } // Default values set: // Setting first values in optValues as default values IF there is not corresponding piVar value set already. foreach ($this->optValues as $optionName => $optionValue) { if (!isset($this->piVars[$optionName])) { reset($optionValue); $this->piVars[$optionName] = key($optionValue); } } // Blind selectors: if (is_array($this->conf['blind.'])) { foreach ($this->conf['blind.'] as $optionName => $optionValue) { if (is_array($optionValue)) { foreach ($optionValue as $optionValueSubKey => $optionValueSubValue) { if (!is_array($optionValueSubValue) && $optionValueSubValue && is_array($this->optValues[substr($optionName, 0, -1)])) { unset($this->optValues[substr($optionName, 0, -1)][$optionValueSubKey]); } } } elseif ($optionValue) { // If value is not set, unset the option array unset($this->optValues[$optionName]); } } } // This gets the search-words into the $sWArr: $this->sWArr = $this->getSearchWords($this->piVars['defOp']); }
/** * Splits the search word input into an array where each word is represented by an array with key "sword" * holding the search word and key "oper" holding the SQL operator (eg. AND, OR) * * Only words with 2 or more characters are accepted * Max 200 chars total * Space is used to split words, "" can be used search for a whole string * AND, OR and NOT are prefix words, overruling the default operator * +/|/- equals AND, OR and NOT as operators. * All search words are converted to lowercase. * * $defOp is the default operator. 1=OR, 0=AND * * @param bool $defaultOperator If TRUE, the default operator will be OR, not AND * @return array Search words if any found */ protected function getSearchWords($defaultOperator) { // Shorten search-word string to max 200 bytes (does NOT take multibyte charsets into account - but never mind, // shortening the string here is only a run-away feature!) $searchWords = substr($this->sword, 0, 200); // Convert to UTF-8 + conv. entities (was also converted during indexing!) $searchWords = $this->charsetConverter->conv($searchWords, $GLOBALS['TSFE']->metaCharset, 'utf-8'); $searchWords = $this->charsetConverter->entities_to_utf8($searchWords); $sWordArray = false; if ($hookObj = $this->hookRequest('getSearchWords')) { $sWordArray = $hookObj->getSearchWords_splitSWords($searchWords, $defaultOperator); } else { // sentence if ($this->searchData['searchType'] == 20) { $sWordArray = array(array('sword' => trim($searchWords), 'oper' => 'AND')); } else { // case-sensitive. Defines the words, which will be // operators between words $operatorTranslateTable = array(array('+', 'AND'), array('|', 'OR'), array('-', 'AND NOT'), array($this->charsetConverter->conv_case('utf-8', LocalizationUtility::translate('localizedOperandAnd', 'IndexedSearch'), 'toLower'), 'AND'), array($this->charsetConverter->conv_case('utf-8', LocalizationUtility::translate('localizedOperandOr', 'IndexedSearch'), 'toLower'), 'OR'), array($this->charsetConverter->conv_case('utf-8', LocalizationUtility::translate('localizedOperandNot', 'IndexedSearch'), 'toLower'), 'AND NOT')); $swordArray = \TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::getExplodedSearchString($searchWords, $defaultOperator == 1 ? 'OR' : 'AND', $operatorTranslateTable); if (is_array($swordArray)) { $sWordArray = $this->procSearchWordsByLexer($swordArray); } } } return $sWordArray; }