public static function runCrawler() { $running = Configuration::getCoreSetting('running'); if ($running === TRUE) { return FALSE; } $indexDir = \LuceneSearch\Plugin::getFrontendSearchIndex(); if ($indexDir) { exec('rm -Rf ' . str_replace('/index/', '/tmpindex', $indexDir)); \Pimcore\Logger::debug('LuceneSearch: rm -Rf ' . str_replace('/index/', '/tmpindex', $indexDir)); \Pimcore\Logger::debug('LuceneSearch: Starting crawl'); try { $urls = Configuration::get('frontend.urls'); $invalidLinkRegexesSystem = Configuration::get('frontend.invalidLinkRegexes'); $invalidLinkRegexesEditable = Configuration::get('frontend.invalidLinkRegexesEditable'); if (!empty($invalidLinkRegexesEditable) and !empty($invalidLinkRegexesSystem)) { $invalidLinkRegexes = array_merge($invalidLinkRegexesEditable, array($invalidLinkRegexesSystem)); } else { if (!empty($invalidLinkRegexesEditable)) { $invalidLinkRegexes = $invalidLinkRegexesEditable; } else { if (!empty($invalidLinkRegexesSystem)) { $invalidLinkRegexes = array($invalidLinkRegexesSystem); } else { $invalidLinkRegexes = array(); } } } self::setCrawlerState('frontend', 'started', TRUE); try { foreach ($urls as $seed) { $parser = new Parser(); $parser->setDepth(Configuration::get('frontend.crawler.maxLinkDepth'))->setValidLinkRegexes(Configuration::get('frontend.validLinkRegexes'))->setInvalidLinkRegexes($invalidLinkRegexes)->setSearchStartIndicator(Configuration::get('frontend.crawler.contentStartIndicator'))->setSearchEndIndicator(Configuration::get('frontend.crawler.contentEndIndicator'))->setSearchExcludeStartIndicator(Configuration::get('frontend.crawler.contentExcludeStartIndicator'))->setSearchExcludeEndIndicator(Configuration::get('frontend.crawler.contentExcludeEndIndicator'))->setAllowSubdomain(FALSE)->setAllowedSchemes(Configuration::get('frontend.allowedSchemes'))->setDownloadLimit(Configuration::get('frontend.crawler.maxDownloadLimit'))->setSeed($seed); if (Configuration::get('frontend.auth.useAuth') === TRUE) { $parser->setAuth(Configuration::get('frontend.auth.username'), Configuration::get('frontend.auth.password')); } $parser->startParser(); $parser->optimizeIndex(); } } catch (\Exception $e) { } self::setCrawlerState('frontend', 'finished', FALSE); //only remove index, if tmp exists! $tmpIndex = str_replace('/index', '/tmpindex', $indexDir); if (is_dir($tmpIndex)) { exec('rm -Rf ' . $indexDir); \Pimcore\Logger::debug('LuceneSearch: rm -Rf ' . $indexDir); exec('cp -R ' . substr($tmpIndex, 0, -1) . ' ' . substr($indexDir, 0, -1)); \Pimcore\Logger::debug('LuceneSearch: cp -R ' . substr($tmpIndex, 0, -1) . ' ' . substr($indexDir, 0, -1)); \Pimcore\Logger::debug('LuceneSearch: replaced old index'); \Pimcore\Logger::info('LuceneSearch: Finished crawl'); } else { \Pimcore\Logger::error('LuceneSearch: skipped index replacing. no tmp index found.'); } } catch (\Exception $e) { \Pimcore\Logger::error($e); throw $e; } } }
public function __construct() { $indexDir = Plugin::getFrontendSearchIndex(); $this->index = \Zend_Search_Lucene::open($indexDir); }
/** * @throws \Exception */ public function init() { parent::init(); if (!Configuration::get('frontend.enabled')) { return FALSE; } try { \Zend_Search_Lucene_Analysis_Analyzer::setDefault(new \Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); $this->frontendIndex = \Zend_Search_Lucene::open(Plugin::getFrontendSearchIndex()); $this->categories = Configuration::get('frontend.categories'); //set search term query $searchQuery = $this->cleanRequestString($this->getParam('q')); if (!empty($searchQuery)) { $this->query = Plugin::cleanTerm($searchQuery); $this->untouchedQuery = $this->query; } //set Language if (Configuration::get('frontend.ignoreLanguage') !== TRUE) { $this->searchLanguage = $this->getParam('language'); if (empty($this->searchLanguage)) { try { $this->searchLanguage = \Zend_Registry::get('Zend_Locale'); } catch (Exception $e) { $this->searchLanguage = 'en'; } } } else { $this->searchLanguage = NULL; } //Set Category $queryCategory = $this->cleanRequestString($this->getParam('category')); if (!empty($queryCategory)) { $this->category = $queryCategory; } //Set Country if (Configuration::get('frontend.ignoreCountry') !== TRUE) { $this->searchCountry = $this->getParam('country'); if ($this->searchCountry == 'global') { $this->searchCountry = 'international'; } else { if (empty($this->searchCountry)) { $this->searchCountry = 'international'; } } } else { $this->searchCountry = NULL; } //Set Restrictions (Auth) if (Configuration::get('frontend.ignoreRestriction') === FALSE) { $this->searchRestriction = TRUE; } //Set Fuzzy Search (Auth) $fuzzySearchRequest = $this->getParam('fuzzy'); if (Configuration::get('frontend.fuzzySearch') == TRUE || !empty($fuzzySearchRequest) && $fuzzySearchRequest !== 'false') { $this->fuzzySearch = TRUE; } //Set own Host Only if (Configuration::get('frontend.ownHostOnly') == TRUE) { $this->ownHostOnly = TRUE; } //Set Entries per Page $this->perPage = Configuration::get('frontend.view.maxPerPage'); $perPage = $this->getParam('perPage'); if (!empty($perPage)) { $this->perPage = (int) $perPage; } //Set max Suggestions $this->maxSuggestions = Configuration::get('frontend.view.maxSuggestions'); //Set Current Page $currentPage = $this->getParam('page'); if (!empty($currentPage)) { $this->currentPage = (int) $currentPage; } } catch (\Exception $e) { throw new \Exception('could not open index'); } }
protected function checkAndPrepareIndex() { if (!$this->index) { $indexDir = Plugin::getFrontendSearchIndex(); //switch to tmpIndex $indexDir = str_replace('/index', '/tmpindex', $indexDir); try { \Zend_Search_Lucene_Analysis_Analyzer::setDefault(new \Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); $this->index = \Zend_Search_Lucene::open($indexDir); } catch (\Exception $e) { \Pimcore\Logger::debug('LuceneSearch: could not open frontend index, creating new one.'); \Zend_Search_Lucene::create($indexDir); $this->index = \Zend_Search_Lucene::open($indexDir); } } }