The class takes the HTML markup of a page, exctracts the content and writes
it to the database (search index). It also provides a method to query the
seach index, returning the matching entries.
Usage:
Search::indexPage($objPage->row());
$result = Search::searchFor('keyword');
while ($result->next())
{
echo $result->url;
}
/** * Prepare the output * * @return \PageModel * * @internal */ protected function prepare() { // Check the search index (see #3761) \Search::removeEntry(\Environment::get('relativeRequest')); // Find the matching root page $objRootPage = $this->getRootPageFromUrl(); // Forward if the language should be but is not set (see #4028) if (\Config::get('addLanguageToUrl')) { // Get the request string without the script name $strRequest = \Environment::get('relativeRequest'); // Only redirect if there is no language fragment (see #4669) if ($strRequest != '' && !preg_match('@^[a-z]{2}(\\-[A-Z]{2})?/@', $strRequest)) { // Handle language fragments without trailing slash (see #7666) if (preg_match('@^[a-z]{2}(\\-[A-Z]{2})?$@', $strRequest)) { $this->redirect(\Environment::get('request') . '/', 301); } else { if ($strRequest == \Environment::get('request')) { $strRequest = $objRootPage->language . '/' . $strRequest; } else { $strRequest = \Environment::get('script') . '/' . $objRootPage->language . '/' . $strRequest; } $this->redirect($strRequest, 301); } } } // Look for a 404 page $obj404 = \PageModel::find404ByPid($objRootPage->id); // Die if there is no page at all if (null === $obj404) { throw new PageNotFoundException('Page not found'); } // Forward to another page if ($obj404->autoforward && $obj404->jumpTo) { $objNextPage = \PageModel::findPublishedById($obj404->jumpTo); if (null === $objNextPage) { $this->log('Forward page ID "' . $obj404->jumpTo . '" does not exist', __METHOD__, TL_ERROR); throw new ForwardPageNotFoundException('Forward page not found'); } $this->redirect($this->generateFrontendUrl($objNextPage->row(), null, $objRootPage->language), $obj404->redirect == 'temporary' ? 302 : 301); } return $obj404; }
/** * Generate the module */ protected function compile() { // Mark the x and y parameter as used (see #4277) if (isset($_GET['x'])) { \Input::get('x'); \Input::get('y'); } // Trigger the search module from a custom form if (!isset($_GET['keywords']) && \Input::post('FORM_SUBMIT') == 'tl_search') { $_GET['keywords'] = \Input::post('keywords'); $_GET['query_type'] = \Input::post('query_type'); $_GET['per_page'] = \Input::post('per_page'); } $blnFuzzy = $this->fuzzy; $strQueryType = \Input::get('query_type') ?: $this->queryType; $strKeywords = trim(\Input::get('keywords')); $this->Template->uniqueId = $this->id; $this->Template->queryType = $strQueryType; $this->Template->keyword = \StringUtil::specialchars($strKeywords); $this->Template->keywordLabel = $GLOBALS['TL_LANG']['MSC']['keywords']; $this->Template->optionsLabel = $GLOBALS['TL_LANG']['MSC']['options']; $this->Template->search = \StringUtil::specialchars($GLOBALS['TL_LANG']['MSC']['searchLabel']); $this->Template->matchAll = \StringUtil::specialchars($GLOBALS['TL_LANG']['MSC']['matchAll']); $this->Template->matchAny = \StringUtil::specialchars($GLOBALS['TL_LANG']['MSC']['matchAny']); $this->Template->action = ampersand(\Environment::get('indexFreeRequest')); $this->Template->advanced = $this->searchType == 'advanced'; // Redirect page if ($this->jumpTo && ($objTarget = $this->objModel->getRelated('jumpTo')) instanceof PageModel) { /** @var PageModel $objTarget */ $this->Template->action = $objTarget->getFrontendUrl(); } $this->Template->pagination = ''; $this->Template->results = ''; // Execute the search if there are keywords if ($strKeywords != '' && $strKeywords != '*' && !$this->jumpTo) { // Reference page if ($this->rootPage > 0) { $intRootId = $this->rootPage; $arrPages = $this->Database->getChildRecords($this->rootPage, 'tl_page'); array_unshift($arrPages, $this->rootPage); } else { /** @var PageModel $objPage */ global $objPage; $intRootId = $objPage->rootId; $arrPages = $this->Database->getChildRecords($objPage->rootId, 'tl_page'); } // HOOK: add custom logic (see #5223) if (isset($GLOBALS['TL_HOOKS']['customizeSearch']) && is_array($GLOBALS['TL_HOOKS']['customizeSearch'])) { foreach ($GLOBALS['TL_HOOKS']['customizeSearch'] as $callback) { $this->import($callback[0]); $this->{$callback[0]}->{$callback[1]}($arrPages, $strKeywords, $strQueryType, $blnFuzzy); } } // Return if there are no pages if (!is_array($arrPages) || empty($arrPages)) { return; } $strCachePath = str_replace(TL_ROOT . DIRECTORY_SEPARATOR, '', \System::getContainer()->getParameter('kernel.cache_dir')); $arrResult = null; $strChecksum = md5($strKeywords . $strQueryType . $intRootId . $blnFuzzy); $query_starttime = microtime(true); $strCacheFile = $strCachePath . '/contao/search/' . $strChecksum . '.json'; // Load the cached result if (file_exists(TL_ROOT . '/' . $strCacheFile)) { $objFile = new \File($strCacheFile); if ($objFile->mtime > time() - 1800) { $arrResult = json_decode($objFile->getContent(), true); } else { $objFile->delete(); } } // Cache the result if ($arrResult === null) { try { $objSearch = \Search::searchFor($strKeywords, $strQueryType == 'or', $arrPages, 0, 0, $blnFuzzy); $arrResult = $objSearch->fetchAllAssoc(); } catch (\Exception $e) { $this->log('Website search failed: ' . $e->getMessage(), __METHOD__, TL_ERROR); $arrResult = array(); } \File::putContent($strCacheFile, json_encode($arrResult)); } $query_endtime = microtime(true); // Sort out protected pages if (\Config::get('indexProtected') && !BE_USER_LOGGED_IN) { $this->import('FrontendUser', 'User'); foreach ($arrResult as $k => $v) { if ($v['protected']) { if (!FE_USER_LOGGED_IN) { unset($arrResult[$k]); } else { $groups = \StringUtil::deserialize($v['groups']); if (!is_array($groups) || empty($groups) || !count(array_intersect($groups, $this->User->groups))) { unset($arrResult[$k]); } } } } $arrResult = array_values($arrResult); } $count = count($arrResult); $this->Template->count = $count; $this->Template->page = null; $this->Template->keywords = $strKeywords; // No results if ($count < 1) { $this->Template->header = sprintf($GLOBALS['TL_LANG']['MSC']['sEmpty'], $strKeywords); $this->Template->duration = substr($query_endtime - $query_starttime, 0, 6) . ' ' . $GLOBALS['TL_LANG']['MSC']['seconds']; return; } $from = 1; $to = $count; // Pagination if ($this->perPage > 0) { $id = 'page_s' . $this->id; $page = \Input::get($id) !== null ? \Input::get($id) : 1; $per_page = \Input::get('per_page') ?: $this->perPage; // Do not index or cache the page if the page number is outside the range if ($page < 1 || $page > max(ceil($count / $per_page), 1)) { throw new PageNotFoundException('Page not found: ' . \Environment::get('uri')); } $from = ($page - 1) * $per_page + 1; $to = $from + $per_page > $count ? $count : $from + $per_page - 1; // Pagination menu if ($to < $count || $from > 1) { $objPagination = new \Pagination($count, $per_page, \Config::get('maxPaginationLinks'), $id); $this->Template->pagination = $objPagination->generate("\n "); } $this->Template->page = $page; } // Get the results for ($i = $from - 1; $i < $to && $i < $count; $i++) { /** @var FrontendTemplate|object $objTemplate */ $objTemplate = new \FrontendTemplate($this->searchTpl); $objTemplate->url = $arrResult[$i]['url']; $objTemplate->link = $arrResult[$i]['title']; $objTemplate->href = $arrResult[$i]['url']; $objTemplate->title = \StringUtil::specialchars($arrResult[$i]['title']); $objTemplate->class = ($i == $from - 1 ? 'first ' : '') . ($i == $to - 1 || $i == $count - 1 ? 'last ' : '') . ($i % 2 == 0 ? 'even' : 'odd'); $objTemplate->relevance = sprintf($GLOBALS['TL_LANG']['MSC']['relevance'], number_format($arrResult[$i]['relevance'] / $arrResult[0]['relevance'] * 100, 2) . '%'); $objTemplate->filesize = $arrResult[$i]['filesize']; $objTemplate->matches = $arrResult[$i]['matches']; $arrContext = array(); $arrMatches = \StringUtil::trimsplit(',', $arrResult[$i]['matches']); // Get the context foreach ($arrMatches as $strWord) { $arrChunks = array(); preg_match_all('/(^|\\b.{0,' . $this->contextLength . '}\\PL)' . str_replace('+', '\\+', $strWord) . '(\\PL.{0,' . $this->contextLength . '}\\b|$)/ui', $arrResult[$i]['text'], $arrChunks); foreach ($arrChunks[0] as $strContext) { $arrContext[] = ' ' . $strContext . ' '; } } // Shorten the context and highlight all keywords if (!empty($arrContext)) { $objTemplate->context = trim(\StringUtil::substrHtml(implode('…', $arrContext), $this->totalLength)); $objTemplate->context = preg_replace('/(\\PL)(' . implode('|', $arrMatches) . ')(\\PL)/ui', '$1<mark class="highlight">$2</mark>$3', $objTemplate->context); $objTemplate->hasContext = true; } $this->Template->results .= $objTemplate->parse(); } $this->Template->header = vsprintf($GLOBALS['TL_LANG']['MSC']['sResults'], array($from, $to, $count, $strKeywords)); $this->Template->duration = substr($query_endtime - $query_starttime, 0, 6) . ' ' . $GLOBALS['TL_LANG']['MSC']['seconds']; } }
/** * Index a page if applicable * * @param Response $objResponse */ public static function indexPageIfApplicable(Response $objResponse) { global $objPage; if ($objPage === null) { return; } // Index page if searching is allowed and there is no back end user if (\Config::get('enableSearch') && $objPage->type == 'regular' && !BE_USER_LOGGED_IN && !$objPage->noSearch) { // Index protected pages if enabled if (\Config::get('indexProtected') || !FE_USER_LOGGED_IN && !$objPage->protected) { $blnIndex = true; // Do not index the page if certain parameters are set foreach (array_keys($_GET) as $key) { if (in_array($key, $GLOBALS['TL_NOINDEX_KEYS']) || strncmp($key, 'page_', 5) === 0) { $blnIndex = false; break; } } if ($blnIndex) { $arrData = array('url' => \Environment::get('base') . \Environment::get('relativeRequest'), 'content' => $objResponse->getContent(), 'title' => $objPage->pageTitle ?: $objPage->title, 'protected' => $objPage->protected ? '1' : '', 'groups' => $objPage->groups, 'pid' => $objPage->id, 'language' => $objPage->language); \Search::indexPage($arrData); } } } }