예제 #1
0
 /**
  * Splitting string into words.
  * Used for indexing, can also be used to find words in query.
  *
  * @param 	string		String with UTF-8 content to process.
  * @return 	array		Array of words in utf-8
  * @todo Define visibility
  */
 public function split2Words($wordString)
 {
     // Reset debug string:
     $this->debugString = '';
     // Then convert the string to lowercase:
     if (!$this->lexerConf['casesensitive']) {
         $wordString = $this->csObj->conv_case('utf-8', $wordString, 'toLower');
     }
     // Now, splitting words:
     $len = 0;
     $start = 0;
     $pos = 0;
     $words = array();
     $this->debugString = '';
     while (1) {
         list($start, $len) = $this->get_word($wordString, $pos);
         if ($len) {
             $this->addWords($words, $wordString, $start, $len);
             if ($this->debug) {
                 $this->debugString .= '<span style="color:red">' . htmlspecialchars(substr($wordString, $pos, $start - $pos)) . '</span>' . htmlspecialchars(substr($wordString, $start, $len));
             }
             $pos = $start + $len;
         } else {
             break;
         }
     }
     return $words;
 }
예제 #2
0
 /**
  * Converts a given string to a string that can be used as a URL segment.
  * The result is not url-encoded.
  *
  * @param string $string
  * @param string $spaceCharacter
  * @return string
  */
 public function convertToSafeString($string, $spaceCharacter = '-')
 {
     $processedTitle = $this->csConvertor->conv_case('utf-8', $string, 'toLower');
     $processedTitle = strip_tags($processedTitle);
     $processedTitle = preg_replace('/[ \\-+_]+/', $spaceCharacter, $processedTitle);
     $processedTitle = $this->csConvertor->specCharsToASCII('utf-8', $processedTitle);
     $processedTitle = preg_replace('/[^\\p{L}0-9' . preg_quote($spaceCharacter) . ']/u', '', $processedTitle);
     $processedTitle = preg_replace('/' . preg_quote($spaceCharacter) . '{2,}/', $spaceCharacter, $processedTitle);
     $processedTitle = trim($processedTitle, $spaceCharacter);
     // TODO Post-processing hook here
     $processedTitle = strtolower($processedTitle);
     return $processedTitle;
 }
예제 #3
0
 /**
  * Handler for the opening of a tag
  */
 public function startHandler($xml_parser, $tag, $attributes)
 {
     if ((string) $this->xmlCharacterData !== '') {
         $this->spellCheckHandler($xml_parser, $this->xmlCharacterData);
         $this->xmlCharacterData = '';
     }
     switch ($tag) {
         case 'spellchecker':
             break;
         case 'br':
         case 'BR':
         case 'img':
         case 'IMG':
         case 'hr':
         case 'HR':
         case 'area':
         case 'AREA':
             $this->text .= '<' . $this->csConvObj->conv_case($this->parserCharset, $tag, 'toLower') . ' ';
             foreach ($attributes as $key => $val) {
                 $this->text .= $key . '="' . $val . '" ';
             }
             $this->text .= ' />';
             break;
         default:
             $this->text .= '<' . $this->csConvObj->conv_case($this->parserCharset, $tag, 'toLower') . ' ';
             foreach ($attributes as $key => $val) {
                 $this->text .= $key . '="' . $val . '" ';
             }
             $this->text .= '>';
     }
 }
 /**
  * Check if $value is valid. If it is not valid, needs to add an error
  * to result.
  *
  * @param mixed $value
  * @return void
  */
 public function isValid($value)
 {
     if (empty($value) || !is_string($value)) {
         return;
     }
     $allowedOptionsArray = GeneralUtility::trimExplode(',', $this->options['array'], true);
     if (!empty($this->options['ignorecase'])) {
         $value = $this->charsetConverter->conv_case('utf-8', $value, 'toLower');
         foreach ($allowedOptionsArray as &$option) {
             $option = $this->charsetConverter->conv_case('utf-8', $option, 'toLower');
         }
     }
     if (!in_array($value, $allowedOptionsArray, !empty($this->options['strict']))) {
         $this->addError($this->renderMessage($this->options['errorMessage'][0], $this->options['errorMessage'][1], 'error'), 1442002594);
     }
 }
 /**
  * Initialize internal variables, especially selector box values for the search form and search words
  *
  * @return void
  */
 public function initialize()
 {
     // Indexer configuration from Extension Manager interface:
     $this->indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']);
     $this->enableMetaphoneSearch = (bool) $this->indexerConfig['enableMetaphoneSearch'];
     $this->storeMetaphoneInfoAsWords = !\TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::isTableUsed('index_words');
     $this->timeTracker = GeneralUtility::makeInstance(TimeTracker::class);
     // Initialize external document parsers for icon display and other soft operations
     if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['external_parsers'])) {
         foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['external_parsers'] as $extension => $_objRef) {
             $this->external_parsers[$extension] = GeneralUtility::getUserObj($_objRef);
             // Init parser and if it returns FALSE, unset its entry again:
             if (!$this->external_parsers[$extension]->softInit($extension)) {
                 unset($this->external_parsers[$extension]);
             }
         }
     }
     // Init lexer (used to post-processing of search words)
     $lexerObjRef = $GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['lexer'] ?: \TYPO3\CMS\IndexedSearch\Lexer::class;
     $this->lexerObj = GeneralUtility::getUserObj($lexerObjRef);
     // If "_sections" is set, this value overrides any existing value.
     if ($this->piVars['_sections']) {
         $this->piVars['sections'] = $this->piVars['_sections'];
     }
     // If "_sections" is set, this value overrides any existing value.
     if ($this->piVars['_freeIndexUid'] !== '_') {
         $this->piVars['freeIndexUid'] = $this->piVars['_freeIndexUid'];
     }
     // Add previous search words to current
     if ($this->piVars['sword_prev_include'] && $this->piVars['sword_prev']) {
         $this->piVars['sword'] = trim($this->piVars['sword_prev']) . ' ' . $this->piVars['sword'];
     }
     $this->piVars['results'] = MathUtility::forceIntegerInRange($this->piVars['results'], 1, 100, $this->defaultResultNumber);
     // Make sure that some cropping and markup constants used later are defined
     $this->loadSettings();
     // Selector-box values defined here:
     $this->optValues = array('type' => array('0' => $this->pi_getLL('opt_type_0'), '1' => $this->pi_getLL('opt_type_1'), '2' => $this->pi_getLL('opt_type_2'), '3' => $this->pi_getLL('opt_type_3'), '10' => $this->pi_getLL('opt_type_10'), '20' => $this->pi_getLL('opt_type_20')), 'defOp' => array('0' => $this->pi_getLL('opt_defOp_0'), '1' => $this->pi_getLL('opt_defOp_1')), 'sections' => array('0' => $this->pi_getLL('opt_sections_0'), '-1' => $this->pi_getLL('opt_sections_-1'), '-2' => $this->pi_getLL('opt_sections_-2'), '-3' => $this->pi_getLL('opt_sections_-3')), 'freeIndexUid' => array('-1' => $this->pi_getLL('opt_freeIndexUid_-1'), '-2' => $this->pi_getLL('opt_freeIndexUid_-2'), '0' => $this->pi_getLL('opt_freeIndexUid_0')), 'media' => array('-1' => $this->pi_getLL('opt_media_-1'), '0' => $this->pi_getLL('opt_media_0'), '-2' => $this->pi_getLL('opt_media_-2')), 'order' => array('rank_flag' => $this->pi_getLL('opt_order_rank_flag'), 'rank_freq' => $this->pi_getLL('opt_order_rank_freq'), 'rank_first' => $this->pi_getLL('opt_order_rank_first'), 'rank_count' => $this->pi_getLL('opt_order_rank_count'), 'mtime' => $this->pi_getLL('opt_order_mtime'), 'title' => $this->pi_getLL('opt_order_title'), 'crdate' => $this->pi_getLL('opt_order_crdate')), 'group' => array('sections' => $this->pi_getLL('opt_group_sections'), 'flat' => $this->pi_getLL('opt_group_flat')), 'lang' => array(-1 => $this->pi_getLL('opt_lang_-1'), 0 => $this->pi_getLL('opt_lang_0')), 'desc' => array('0' => $this->pi_getLL('opt_desc_0'), '1' => $this->pi_getLL('opt_desc_1')), 'results' => array('10' => '10', '20' => '20', '50' => '50', '100' => '100'));
     // Remove this option if metaphone search is disabled)
     if (!$this->enableMetaphoneSearch) {
         unset($this->optValues['type']['10']);
     }
     // Free Index Uid:
     if ($this->conf['search.']['defaultFreeIndexUidList']) {
         $uidList = GeneralUtility::intExplode(',', $this->conf['search.']['defaultFreeIndexUidList']);
         $indexCfgRecords = $this->databaseConnection->exec_SELECTgetRows('uid,title', 'index_config', 'uid IN (' . implode(',', $uidList) . ')' . $this->cObj->enableFields('index_config'), '', '', '', 'uid');
         foreach ($uidList as $uidValue) {
             if (is_array($indexCfgRecords[$uidValue])) {
                 $this->optValues['freeIndexUid'][$uidValue] = $indexCfgRecords[$uidValue]['title'];
             }
         }
     }
     // Should we use join_pages instead of long lists of uids?
     if ($this->conf['search.']['skipExtendToSubpagesChecking']) {
         $this->join_pages = 1;
     }
     // Add media to search in:
     if (trim($this->conf['search.']['mediaList']) !== '') {
         $mediaList = implode(',', GeneralUtility::trimExplode(',', $this->conf['search.']['mediaList'], true));
     }
     foreach ($this->external_parsers as $extension => $obj) {
         // Skip unwanted extensions
         if ($mediaList && !GeneralUtility::inList($mediaList, $extension)) {
             continue;
         }
         if ($name = $obj->searchTypeMediaTitle($extension)) {
             $this->optValues['media'][$extension] = $this->pi_getLL('opt_sections_' . $extension, $name);
         }
     }
     // Add operators for various languages
     // Converts the operators to lowercase
     $this->operator_translate_table[] = array($this->charsetConverter->conv_case('utf-8', $this->pi_getLL('local_operator_AND'), 'toLower'), 'AND');
     $this->operator_translate_table[] = array($this->charsetConverter->conv_case('utf-8', $this->pi_getLL('local_operator_OR'), 'toLower'), 'OR');
     $this->operator_translate_table[] = array($this->charsetConverter->conv_case('utf-8', $this->pi_getLL('local_operator_NOT'), 'toLower'), 'AND NOT');
     // This is the id of the site root. This value may be a commalist of integer (prepared for this)
     $this->wholeSiteIdList = (int) $this->frontendController->config['rootLine'][0]['uid'];
     // Creating levels for section menu:
     // This selects the first and secondary menus for the "sections" selector - so we can search in sections and sub sections.
     if ($this->conf['show.']['L1sections']) {
         $firstLevelMenu = $this->getMenu($this->wholeSiteIdList);
         foreach ($firstLevelMenu as $optionName => $mR) {
             if (!$mR['nav_hide']) {
                 $this->optValues['sections']['rl1_' . $mR['uid']] = trim($this->pi_getLL('opt_RL1') . ' ' . $mR['title']);
                 if ($this->conf['show.']['L2sections']) {
                     $secondLevelMenu = $this->getMenu($mR['uid']);
                     foreach ($secondLevelMenu as $kk2 => $mR2) {
                         if (!$mR2['nav_hide']) {
                             $this->optValues['sections']['rl2_' . $mR2['uid']] = trim($this->pi_getLL('opt_RL2') . ' ' . $mR2['title']);
                         } else {
                             unset($secondLevelMenu[$kk2]);
                         }
                     }
                     $this->optValues['sections']['rl2_' . implode(',', array_keys($secondLevelMenu))] = $this->pi_getLL('opt_RL2ALL');
                 }
             } else {
                 unset($firstLevelMenu[$optionName]);
             }
         }
         $this->optValues['sections']['rl1_' . implode(',', array_keys($firstLevelMenu))] = $this->pi_getLL('opt_RL1ALL');
     }
     // Setting the list of root IDs for the search. Notice, these page IDs MUST have a TypoScript template with root flag on them! Basically this list is used to select on the "rl0" field and page ids are registered as "rl0" only if a TypoScript template record with root flag is there.
     // This happens AFTER the use of $this->wholeSiteIdList above because the above will then fetch the menu for the CURRENT site - regardless of this kind of searching here. Thus a general search will lookup in the WHOLE database while a specific section search will take the current sections...
     if ($this->conf['search.']['rootPidList']) {
         $this->wholeSiteIdList = implode(',', GeneralUtility::intExplode(',', $this->conf['search.']['rootPidList']));
     }
     // Load the template
     $this->templateCode = $this->cObj->fileResource($this->conf['templateFile']);
     // Add search languages:
     $res = $this->databaseConnection->exec_SELECTquery('*', 'sys_language', '1=1' . $this->cObj->enableFields('sys_language'));
     while (false !== ($data = $this->databaseConnection->sql_fetch_assoc($res))) {
         $this->optValues['lang'][$data['uid']] = $data['title'];
     }
     $this->databaseConnection->sql_free_result($res);
     // Calling hook for modification of initialized content
     if ($hookObj = $this->hookRequest('initialize_postProc')) {
         $hookObj->initialize_postProc();
     }
     // Default values set:
     // Setting first values in optValues as default values IF there is not corresponding piVar value set already.
     foreach ($this->optValues as $optionName => $optionValue) {
         if (!isset($this->piVars[$optionName])) {
             reset($optionValue);
             $this->piVars[$optionName] = key($optionValue);
         }
     }
     // Blind selectors:
     if (is_array($this->conf['blind.'])) {
         foreach ($this->conf['blind.'] as $optionName => $optionValue) {
             if (is_array($optionValue)) {
                 foreach ($optionValue as $optionValueSubKey => $optionValueSubValue) {
                     if (!is_array($optionValueSubValue) && $optionValueSubValue && is_array($this->optValues[substr($optionName, 0, -1)])) {
                         unset($this->optValues[substr($optionName, 0, -1)][$optionValueSubKey]);
                     }
                 }
             } elseif ($optionValue) {
                 // If value is not set, unset the option array
                 unset($this->optValues[$optionName]);
             }
         }
     }
     // This gets the search-words into the $sWArr:
     $this->sWArr = $this->getSearchWords($this->piVars['defOp']);
 }
 /**
  * Splits the search word input into an array where each word is represented by an array with key "sword"
  * holding the search word and key "oper" holding the SQL operator (eg. AND, OR)
  *
  * Only words with 2 or more characters are accepted
  * Max 200 chars total
  * Space is used to split words, "" can be used search for a whole string
  * AND, OR and NOT are prefix words, overruling the default operator
  * +/|/- equals AND, OR and NOT as operators.
  * All search words are converted to lowercase.
  *
  * $defOp is the default operator. 1=OR, 0=AND
  *
  * @param bool $defaultOperator If TRUE, the default operator will be OR, not AND
  * @return array Search words if any found
  */
 protected function getSearchWords($defaultOperator)
 {
     // Shorten search-word string to max 200 bytes (does NOT take multibyte charsets into account - but never mind,
     // shortening the string here is only a run-away feature!)
     $searchWords = substr($this->sword, 0, 200);
     // Convert to UTF-8 + conv. entities (was also converted during indexing!)
     $searchWords = $this->charsetConverter->conv($searchWords, $GLOBALS['TSFE']->metaCharset, 'utf-8');
     $searchWords = $this->charsetConverter->entities_to_utf8($searchWords);
     $sWordArray = false;
     if ($hookObj = $this->hookRequest('getSearchWords')) {
         $sWordArray = $hookObj->getSearchWords_splitSWords($searchWords, $defaultOperator);
     } else {
         // sentence
         if ($this->searchData['searchType'] == 20) {
             $sWordArray = array(array('sword' => trim($searchWords), 'oper' => 'AND'));
         } else {
             // case-sensitive. Defines the words, which will be
             // operators between words
             $operatorTranslateTable = array(array('+', 'AND'), array('|', 'OR'), array('-', 'AND NOT'), array($this->charsetConverter->conv_case('utf-8', LocalizationUtility::translate('localizedOperandAnd', 'IndexedSearch'), 'toLower'), 'AND'), array($this->charsetConverter->conv_case('utf-8', LocalizationUtility::translate('localizedOperandOr', 'IndexedSearch'), 'toLower'), 'OR'), array($this->charsetConverter->conv_case('utf-8', LocalizationUtility::translate('localizedOperandNot', 'IndexedSearch'), 'toLower'), 'AND NOT'));
             $swordArray = \TYPO3\CMS\IndexedSearch\Utility\IndexedSearchUtility::getExplodedSearchString($searchWords, $defaultOperator == 1 ? 'OR' : 'AND', $operatorTranslateTable);
             if (is_array($swordArray)) {
                 $sWordArray = $this->procSearchWordsByLexer($swordArray);
             }
         }
     }
     return $sWordArray;
 }