public static function runCrawler() { $running = Configuration::getCoreSetting('running'); if ($running === TRUE) { return FALSE; } $indexDir = \LuceneSearch\Plugin::getFrontendSearchIndex(); if ($indexDir) { exec('rm -Rf ' . str_replace('/index/', '/tmpindex', $indexDir)); \Pimcore\Logger::debug('LuceneSearch: rm -Rf ' . str_replace('/index/', '/tmpindex', $indexDir)); \Pimcore\Logger::debug('LuceneSearch: Starting crawl'); try { $urls = Configuration::get('frontend.urls'); $invalidLinkRegexesSystem = Configuration::get('frontend.invalidLinkRegexes'); $invalidLinkRegexesEditable = Configuration::get('frontend.invalidLinkRegexesEditable'); if (!empty($invalidLinkRegexesEditable) and !empty($invalidLinkRegexesSystem)) { $invalidLinkRegexes = array_merge($invalidLinkRegexesEditable, array($invalidLinkRegexesSystem)); } else { if (!empty($invalidLinkRegexesEditable)) { $invalidLinkRegexes = $invalidLinkRegexesEditable; } else { if (!empty($invalidLinkRegexesSystem)) { $invalidLinkRegexes = array($invalidLinkRegexesSystem); } else { $invalidLinkRegexes = array(); } } } self::setCrawlerState('frontend', 'started', TRUE); try { foreach ($urls as $seed) { $parser = new Parser(); $parser->setDepth(Configuration::get('frontend.crawler.maxLinkDepth'))->setValidLinkRegexes(Configuration::get('frontend.validLinkRegexes'))->setInvalidLinkRegexes($invalidLinkRegexes)->setSearchStartIndicator(Configuration::get('frontend.crawler.contentStartIndicator'))->setSearchEndIndicator(Configuration::get('frontend.crawler.contentEndIndicator'))->setSearchExcludeStartIndicator(Configuration::get('frontend.crawler.contentExcludeStartIndicator'))->setSearchExcludeEndIndicator(Configuration::get('frontend.crawler.contentExcludeEndIndicator'))->setAllowSubdomain(FALSE)->setAllowedSchemes(Configuration::get('frontend.allowedSchemes'))->setDownloadLimit(Configuration::get('frontend.crawler.maxDownloadLimit'))->setSeed($seed); if (Configuration::get('frontend.auth.useAuth') === TRUE) { $parser->setAuth(Configuration::get('frontend.auth.username'), Configuration::get('frontend.auth.password')); } $parser->startParser(); $parser->optimizeIndex(); } } catch (\Exception $e) { } self::setCrawlerState('frontend', 'finished', FALSE); //only remove index, if tmp exists! $tmpIndex = str_replace('/index', '/tmpindex', $indexDir); if (is_dir($tmpIndex)) { exec('rm -Rf ' . $indexDir); \Pimcore\Logger::debug('LuceneSearch: rm -Rf ' . $indexDir); exec('cp -R ' . substr($tmpIndex, 0, -1) . ' ' . substr($indexDir, 0, -1)); \Pimcore\Logger::debug('LuceneSearch: cp -R ' . substr($tmpIndex, 0, -1) . ' ' . substr($indexDir, 0, -1)); \Pimcore\Logger::debug('LuceneSearch: replaced old index'); \Pimcore\Logger::info('LuceneSearch: Finished crawl'); } else { \Pimcore\Logger::error('LuceneSearch: skipped index replacing. no tmp index found.'); } } catch (\Exception $e) { \Pimcore\Logger::error($e); throw $e; } } }
public function startFrontendCrawlerAction() { Plugin::forceCrawlerStartOnNextMaintenance('frontend'); $this->_helper->json(array('success' => TRUE)); }
public function __construct() { $indexDir = Plugin::getFrontendSearchIndex(); $this->index = \Zend_Search_Lucene::open($indexDir); }
private function getFuzzySuggestions($searchResults = array()) { $suggestions = array(); //look for similar search terms if (!empty($this->query) && (empty($searchResults) || count($searchResults) < 1)) { $terms = Plugin::fuzzyFindTerms($this->query, $this->frontendIndex, 3); if (empty($terms) || count($terms) < 1) { $terms = Plugin::fuzzyFindTerms($this->query, $this->frontendIndex, 0); } if (is_array($terms)) { $counter = 0; foreach ($terms as $term) { $t = $term->text; $hits = NULL; $query = new \Zend_Search_Lucene_Search_Query_Boolean(); $userQuery = \Zend_Search_Lucene_Search_QueryParser::parse($t, 'utf-8'); $query->addSubquery($userQuery, TRUE); $this->addLanguageQuery($query); $this->addCategoryQuery($query); $this->addCountryQuery($query); $this->addRestrictionQuery($query); $validHits = $this->getValidHits($this->frontendIndex->find($query)); if (count($validHits) > 0 && !in_array($t, $suggestions)) { $suggestions[] = $t; if ($counter >= $this->maxSuggestions) { break; } $counter++; } } } } return $suggestions; }
protected function checkAndPrepareIndex() { if (!$this->index) { $indexDir = Plugin::getFrontendSearchIndex(); //switch to tmpIndex $indexDir = str_replace('/index', '/tmpindex', $indexDir); try { \Zend_Search_Lucene_Analysis_Analyzer::setDefault(new \Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); $this->index = \Zend_Search_Lucene::open($indexDir); } catch (\Exception $e) { \Pimcore\Logger::debug('LuceneSearch: could not open frontend index, creating new one.'); \Zend_Search_Lucene::create($indexDir); $this->index = \Zend_Search_Lucene::open($indexDir); } } }