The class takes the HTML markup of a page, exctracts the content and writes it to the database (search index). It also provides a method to query the seach index, returning the matching entries. Usage: Search::indexPage($objPage->row()); $result = Search::searchFor('keyword'); while ($result->next()) { echo $result->url; }
Пример #1
0
 /**
  * Prepare the output
  *
  * @return \PageModel
  *
  * @internal
  */
 protected function prepare()
 {
     // Check the search index (see #3761)
     \Search::removeEntry(\Environment::get('relativeRequest'));
     // Find the matching root page
     $objRootPage = $this->getRootPageFromUrl();
     // Forward if the language should be but is not set (see #4028)
     if (\Config::get('addLanguageToUrl')) {
         // Get the request string without the script name
         $strRequest = \Environment::get('relativeRequest');
         // Only redirect if there is no language fragment (see #4669)
         if ($strRequest != '' && !preg_match('@^[a-z]{2}(\\-[A-Z]{2})?/@', $strRequest)) {
             // Handle language fragments without trailing slash (see #7666)
             if (preg_match('@^[a-z]{2}(\\-[A-Z]{2})?$@', $strRequest)) {
                 $this->redirect(\Environment::get('request') . '/', 301);
             } else {
                 if ($strRequest == \Environment::get('request')) {
                     $strRequest = $objRootPage->language . '/' . $strRequest;
                 } else {
                     $strRequest = \Environment::get('script') . '/' . $objRootPage->language . '/' . $strRequest;
                 }
                 $this->redirect($strRequest, 301);
             }
         }
     }
     // Look for a 404 page
     $obj404 = \PageModel::find404ByPid($objRootPage->id);
     // Die if there is no page at all
     if (null === $obj404) {
         throw new PageNotFoundException('Page not found');
     }
     // Forward to another page
     if ($obj404->autoforward && $obj404->jumpTo) {
         $objNextPage = \PageModel::findPublishedById($obj404->jumpTo);
         if (null === $objNextPage) {
             $this->log('Forward page ID "' . $obj404->jumpTo . '" does not exist', __METHOD__, TL_ERROR);
             throw new ForwardPageNotFoundException('Forward page not found');
         }
         $this->redirect($this->generateFrontendUrl($objNextPage->row(), null, $objRootPage->language), $obj404->redirect == 'temporary' ? 302 : 301);
     }
     return $obj404;
 }
Пример #2
0
 /**
  * Generate the module
  */
 protected function compile()
 {
     // Mark the x and y parameter as used (see #4277)
     if (isset($_GET['x'])) {
         \Input::get('x');
         \Input::get('y');
     }
     // Trigger the search module from a custom form
     if (!isset($_GET['keywords']) && \Input::post('FORM_SUBMIT') == 'tl_search') {
         $_GET['keywords'] = \Input::post('keywords');
         $_GET['query_type'] = \Input::post('query_type');
         $_GET['per_page'] = \Input::post('per_page');
     }
     $blnFuzzy = $this->fuzzy;
     $strQueryType = \Input::get('query_type') ?: $this->queryType;
     $strKeywords = trim(\Input::get('keywords'));
     $this->Template->uniqueId = $this->id;
     $this->Template->queryType = $strQueryType;
     $this->Template->keyword = \StringUtil::specialchars($strKeywords);
     $this->Template->keywordLabel = $GLOBALS['TL_LANG']['MSC']['keywords'];
     $this->Template->optionsLabel = $GLOBALS['TL_LANG']['MSC']['options'];
     $this->Template->search = \StringUtil::specialchars($GLOBALS['TL_LANG']['MSC']['searchLabel']);
     $this->Template->matchAll = \StringUtil::specialchars($GLOBALS['TL_LANG']['MSC']['matchAll']);
     $this->Template->matchAny = \StringUtil::specialchars($GLOBALS['TL_LANG']['MSC']['matchAny']);
     $this->Template->action = ampersand(\Environment::get('indexFreeRequest'));
     $this->Template->advanced = $this->searchType == 'advanced';
     // Redirect page
     if ($this->jumpTo && ($objTarget = $this->objModel->getRelated('jumpTo')) instanceof PageModel) {
         /** @var PageModel $objTarget */
         $this->Template->action = $objTarget->getFrontendUrl();
     }
     $this->Template->pagination = '';
     $this->Template->results = '';
     // Execute the search if there are keywords
     if ($strKeywords != '' && $strKeywords != '*' && !$this->jumpTo) {
         // Reference page
         if ($this->rootPage > 0) {
             $intRootId = $this->rootPage;
             $arrPages = $this->Database->getChildRecords($this->rootPage, 'tl_page');
             array_unshift($arrPages, $this->rootPage);
         } else {
             /** @var PageModel $objPage */
             global $objPage;
             $intRootId = $objPage->rootId;
             $arrPages = $this->Database->getChildRecords($objPage->rootId, 'tl_page');
         }
         // HOOK: add custom logic (see #5223)
         if (isset($GLOBALS['TL_HOOKS']['customizeSearch']) && is_array($GLOBALS['TL_HOOKS']['customizeSearch'])) {
             foreach ($GLOBALS['TL_HOOKS']['customizeSearch'] as $callback) {
                 $this->import($callback[0]);
                 $this->{$callback[0]}->{$callback[1]}($arrPages, $strKeywords, $strQueryType, $blnFuzzy);
             }
         }
         // Return if there are no pages
         if (!is_array($arrPages) || empty($arrPages)) {
             return;
         }
         $strCachePath = str_replace(TL_ROOT . DIRECTORY_SEPARATOR, '', \System::getContainer()->getParameter('kernel.cache_dir'));
         $arrResult = null;
         $strChecksum = md5($strKeywords . $strQueryType . $intRootId . $blnFuzzy);
         $query_starttime = microtime(true);
         $strCacheFile = $strCachePath . '/contao/search/' . $strChecksum . '.json';
         // Load the cached result
         if (file_exists(TL_ROOT . '/' . $strCacheFile)) {
             $objFile = new \File($strCacheFile);
             if ($objFile->mtime > time() - 1800) {
                 $arrResult = json_decode($objFile->getContent(), true);
             } else {
                 $objFile->delete();
             }
         }
         // Cache the result
         if ($arrResult === null) {
             try {
                 $objSearch = \Search::searchFor($strKeywords, $strQueryType == 'or', $arrPages, 0, 0, $blnFuzzy);
                 $arrResult = $objSearch->fetchAllAssoc();
             } catch (\Exception $e) {
                 $this->log('Website search failed: ' . $e->getMessage(), __METHOD__, TL_ERROR);
                 $arrResult = array();
             }
             \File::putContent($strCacheFile, json_encode($arrResult));
         }
         $query_endtime = microtime(true);
         // Sort out protected pages
         if (\Config::get('indexProtected') && !BE_USER_LOGGED_IN) {
             $this->import('FrontendUser', 'User');
             foreach ($arrResult as $k => $v) {
                 if ($v['protected']) {
                     if (!FE_USER_LOGGED_IN) {
                         unset($arrResult[$k]);
                     } else {
                         $groups = \StringUtil::deserialize($v['groups']);
                         if (!is_array($groups) || empty($groups) || !count(array_intersect($groups, $this->User->groups))) {
                             unset($arrResult[$k]);
                         }
                     }
                 }
             }
             $arrResult = array_values($arrResult);
         }
         $count = count($arrResult);
         $this->Template->count = $count;
         $this->Template->page = null;
         $this->Template->keywords = $strKeywords;
         // No results
         if ($count < 1) {
             $this->Template->header = sprintf($GLOBALS['TL_LANG']['MSC']['sEmpty'], $strKeywords);
             $this->Template->duration = substr($query_endtime - $query_starttime, 0, 6) . ' ' . $GLOBALS['TL_LANG']['MSC']['seconds'];
             return;
         }
         $from = 1;
         $to = $count;
         // Pagination
         if ($this->perPage > 0) {
             $id = 'page_s' . $this->id;
             $page = \Input::get($id) !== null ? \Input::get($id) : 1;
             $per_page = \Input::get('per_page') ?: $this->perPage;
             // Do not index or cache the page if the page number is outside the range
             if ($page < 1 || $page > max(ceil($count / $per_page), 1)) {
                 throw new PageNotFoundException('Page not found: ' . \Environment::get('uri'));
             }
             $from = ($page - 1) * $per_page + 1;
             $to = $from + $per_page > $count ? $count : $from + $per_page - 1;
             // Pagination menu
             if ($to < $count || $from > 1) {
                 $objPagination = new \Pagination($count, $per_page, \Config::get('maxPaginationLinks'), $id);
                 $this->Template->pagination = $objPagination->generate("\n  ");
             }
             $this->Template->page = $page;
         }
         // Get the results
         for ($i = $from - 1; $i < $to && $i < $count; $i++) {
             /** @var FrontendTemplate|object $objTemplate */
             $objTemplate = new \FrontendTemplate($this->searchTpl);
             $objTemplate->url = $arrResult[$i]['url'];
             $objTemplate->link = $arrResult[$i]['title'];
             $objTemplate->href = $arrResult[$i]['url'];
             $objTemplate->title = \StringUtil::specialchars($arrResult[$i]['title']);
             $objTemplate->class = ($i == $from - 1 ? 'first ' : '') . ($i == $to - 1 || $i == $count - 1 ? 'last ' : '') . ($i % 2 == 0 ? 'even' : 'odd');
             $objTemplate->relevance = sprintf($GLOBALS['TL_LANG']['MSC']['relevance'], number_format($arrResult[$i]['relevance'] / $arrResult[0]['relevance'] * 100, 2) . '%');
             $objTemplate->filesize = $arrResult[$i]['filesize'];
             $objTemplate->matches = $arrResult[$i]['matches'];
             $arrContext = array();
             $arrMatches = \StringUtil::trimsplit(',', $arrResult[$i]['matches']);
             // Get the context
             foreach ($arrMatches as $strWord) {
                 $arrChunks = array();
                 preg_match_all('/(^|\\b.{0,' . $this->contextLength . '}\\PL)' . str_replace('+', '\\+', $strWord) . '(\\PL.{0,' . $this->contextLength . '}\\b|$)/ui', $arrResult[$i]['text'], $arrChunks);
                 foreach ($arrChunks[0] as $strContext) {
                     $arrContext[] = ' ' . $strContext . ' ';
                 }
             }
             // Shorten the context and highlight all keywords
             if (!empty($arrContext)) {
                 $objTemplate->context = trim(\StringUtil::substrHtml(implode('…', $arrContext), $this->totalLength));
                 $objTemplate->context = preg_replace('/(\\PL)(' . implode('|', $arrMatches) . ')(\\PL)/ui', '$1<mark class="highlight">$2</mark>$3', $objTemplate->context);
                 $objTemplate->hasContext = true;
             }
             $this->Template->results .= $objTemplate->parse();
         }
         $this->Template->header = vsprintf($GLOBALS['TL_LANG']['MSC']['sResults'], array($from, $to, $count, $strKeywords));
         $this->Template->duration = substr($query_endtime - $query_starttime, 0, 6) . ' ' . $GLOBALS['TL_LANG']['MSC']['seconds'];
     }
 }
Пример #3
0
 /**
  * Index a page if applicable
  *
  * @param Response $objResponse
  */
 public static function indexPageIfApplicable(Response $objResponse)
 {
     global $objPage;
     if ($objPage === null) {
         return;
     }
     // Index page if searching is allowed and there is no back end user
     if (\Config::get('enableSearch') && $objPage->type == 'regular' && !BE_USER_LOGGED_IN && !$objPage->noSearch) {
         // Index protected pages if enabled
         if (\Config::get('indexProtected') || !FE_USER_LOGGED_IN && !$objPage->protected) {
             $blnIndex = true;
             // Do not index the page if certain parameters are set
             foreach (array_keys($_GET) as $key) {
                 if (in_array($key, $GLOBALS['TL_NOINDEX_KEYS']) || strncmp($key, 'page_', 5) === 0) {
                     $blnIndex = false;
                     break;
                 }
             }
             if ($blnIndex) {
                 $arrData = array('url' => \Environment::get('base') . \Environment::get('relativeRequest'), 'content' => $objResponse->getContent(), 'title' => $objPage->pageTitle ?: $objPage->title, 'protected' => $objPage->protected ? '1' : '', 'groups' => $objPage->groups, 'pid' => $objPage->id, 'language' => $objPage->language);
                 \Search::indexPage($arrData);
             }
         }
     }
 }