public static function runCrawler() { $running = Configuration::getCoreSetting('running'); if ($running === TRUE) { return FALSE; } $indexDir = \LuceneSearch\Plugin::getFrontendSearchIndex(); if ($indexDir) { exec('rm -Rf ' . str_replace('/index/', '/tmpindex', $indexDir)); \Pimcore\Logger::debug('LuceneSearch: rm -Rf ' . str_replace('/index/', '/tmpindex', $indexDir)); \Pimcore\Logger::debug('LuceneSearch: Starting crawl'); try { $urls = Configuration::get('frontend.urls'); $invalidLinkRegexesSystem = Configuration::get('frontend.invalidLinkRegexes'); $invalidLinkRegexesEditable = Configuration::get('frontend.invalidLinkRegexesEditable'); if (!empty($invalidLinkRegexesEditable) and !empty($invalidLinkRegexesSystem)) { $invalidLinkRegexes = array_merge($invalidLinkRegexesEditable, array($invalidLinkRegexesSystem)); } else { if (!empty($invalidLinkRegexesEditable)) { $invalidLinkRegexes = $invalidLinkRegexesEditable; } else { if (!empty($invalidLinkRegexesSystem)) { $invalidLinkRegexes = array($invalidLinkRegexesSystem); } else { $invalidLinkRegexes = array(); } } } self::setCrawlerState('frontend', 'started', TRUE); try { foreach ($urls as $seed) { $parser = new Parser(); $parser->setDepth(Configuration::get('frontend.crawler.maxLinkDepth'))->setValidLinkRegexes(Configuration::get('frontend.validLinkRegexes'))->setInvalidLinkRegexes($invalidLinkRegexes)->setSearchStartIndicator(Configuration::get('frontend.crawler.contentStartIndicator'))->setSearchEndIndicator(Configuration::get('frontend.crawler.contentEndIndicator'))->setSearchExcludeStartIndicator(Configuration::get('frontend.crawler.contentExcludeStartIndicator'))->setSearchExcludeEndIndicator(Configuration::get('frontend.crawler.contentExcludeEndIndicator'))->setAllowSubdomain(FALSE)->setAllowedSchemes(Configuration::get('frontend.allowedSchemes'))->setDownloadLimit(Configuration::get('frontend.crawler.maxDownloadLimit'))->setSeed($seed); if (Configuration::get('frontend.auth.useAuth') === TRUE) { $parser->setAuth(Configuration::get('frontend.auth.username'), Configuration::get('frontend.auth.password')); } $parser->startParser(); $parser->optimizeIndex(); } } catch (\Exception $e) { } self::setCrawlerState('frontend', 'finished', FALSE); //only remove index, if tmp exists! $tmpIndex = str_replace('/index', '/tmpindex', $indexDir); if (is_dir($tmpIndex)) { exec('rm -Rf ' . $indexDir); \Pimcore\Logger::debug('LuceneSearch: rm -Rf ' . $indexDir); exec('cp -R ' . substr($tmpIndex, 0, -1) . ' ' . substr($indexDir, 0, -1)); \Pimcore\Logger::debug('LuceneSearch: cp -R ' . substr($tmpIndex, 0, -1) . ' ' . substr($indexDir, 0, -1)); \Pimcore\Logger::debug('LuceneSearch: replaced old index'); \Pimcore\Logger::info('LuceneSearch: Finished crawl'); } else { \Pimcore\Logger::error('LuceneSearch: skipped index replacing. no tmp index found.'); } } catch (\Exception $e) { \Pimcore\Logger::error($e); throw $e; } } }
/** * Hook called when maintenance script is called */ public function maintenanceJob() { if (self::isInstalled()) { $currentHour = date('H', time()); //Frontend recrawl $running = self::frontendCrawlerRunning(); $enabled = Configuration::get('frontend.enabled'); $lastStarted = Configuration::getCoreSetting('started'); $lastFinished = Configuration::getCoreSetting('finished'); $forceStart = Configuration::getCoreSetting('forceStart'); $aDayAgo = time() - 24 * 60 * 60; /** * + If Crawler is enabled * + If Crawler is not running * + If last start of Crawler is initial or a day ago * + If it's between 1 + 3 o clock in the night * + OR if its force * => RUN */ if ($enabled && !$running && ((is_bool($lastStarted) || $lastStarted <= $aDayAgo) && $currentHour > 1 && $currentHour < 3 || $forceStart)) { \Pimcore\Logger::debug('starting frontend recrawl...'); $this->frontendCrawl(); /** * + If Crawler is Running * + If last stop of crawler is before last start * + If last start is older than one day * => We have some errors: EXIT CRAWLING! */ } else { if ($running && $lastFinished < $lastStarted && $lastStarted <= $aDayAgo) { \Pimcore\Logger::error('LuceneSearch: There seems to be a problem with the search crawler! Trying to stop it.'); $this->stopFrontendCrawler(); } } } else { \Pimcore\Logger::debug('LuceneSearch: Plugin is not installed - no maintenance to do for this plugin.'); } }