protected function checkAndPrepareIndex() { if (!$this->index) { $indexDir = SearchPhp_Plugin::getFrontendSearchIndex(); //switch to tmpIndex $indexDir = str_replace("/index", "/tmpindex", $indexDir); try { Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); $this->index = Zend_Search_Lucene::open($indexDir); } catch (Exception $e) { logger::log(get_class($this) . ": could not open frontend index, creating new one.", Zend_Log::WARN); Zend_Search_Lucene::create($indexDir); $this->index = Zend_Search_Lucene::open($indexDir); } } }
ini_set("max_execution_time", "-1"); logger::log("SearchPhp_Plugin: Starting crawl", Zend_Log::DEBUG); //TODO nix specific exec("rm -Rf " . str_replace("/index", "/tmpindex", $indexDir) . " " . $indexDir); $confArray = SearchPhp_Plugin::getSearchConfigArray(); $urls = explode(",", $confArray['search']['frontend']['urls']); $validLinkRegexes = explode(",", $confArray['search']['frontend']['validLinkRegexes']); $invalidLinkRegexes = explode(",", $confArray['search']['frontend']['invalidLinkRegexes']); $rawConfig = new Zend_Config_Xml(PIMCORE_PLUGINS_PATH . SearchPhp_Plugin::$configFile); $rawConfigArray = $rawConfig->toArray(); $rawConfigArray['search']['frontend']['crawler']['running'] = 1; $rawConfigArray['search']['frontend']['crawler']['started'] = time(); $config = new Zend_Config($rawConfigArray, true); $writer = new Zend_Config_Writer_Xml(array("config" => $config, "filename" => PIMCORE_PLUGINS_PATH . SearchPhp_Plugin::$configFile)); $writer->write(); $crawler = new SearchPhp_Frontend_Crawler($validLinkRegexes, $invalidLinkRegexes, 10, 30, $confArray['search']['frontend']['crawler']['contentStartIndicator'], $confArray['search']['frontend']['crawler']['contentEndIndicator']); $crawler->findLinks($urls); $rawConfig = new Zend_Config_Xml(PIMCORE_PLUGINS_PATH . SearchPhp_Plugin::$configFile); $rawConfigArray = $rawConfig->toArray(); $rawConfigArray['search']['frontend']['crawler']['running'] = 0; $rawConfigArray['search']['frontend']['crawler']['finished'] = time(); $config = new Zend_Config($rawConfigArray, true); $writer = new Zend_Config_Writer_Xml(array("config" => $config, "filename" => PIMCORE_PLUGINS_PATH . SearchPhp_Plugin::$configFile)); $writer->write(); logger::log("SearchPhp_Plugin: replacing old index ...", Zend_Log::DEBUG); $indexDir = SearchPhp_Plugin::getFrontendSearchIndex(); //TODO nix specific exec("rm -Rf " . $indexDir); exec("mv " . str_replace("/index", "/tmpindex", $indexDir) . " " . $indexDir); logger::log("Search_PluginPhp: replaced old index", Zend_Log::DEBUG); logger::log("Search_PluginPhp: Finished crawl", Zend_Log::DEBUG);
public function findAction() { $queryFromRequest = $this->cleanRequestString($_REQUEST["query"]); $categoryFromRequest = $this->cleanRequestString($_REQUEST["cat"]); $searcher = new SearchPhp_Frontend_Searcher(); $this->view->groupByCategory = $this->_getParam("groupByCategory"); $this->view->omitSearchForm = $this->_getParam("omitSearchForm"); $this->view->categoryOrder = $this->_getParam("categoryOrder"); $this->view->omitJsIncludes = $this->_getParam("omitJsIncludes"); $perPage = $this->_getParam("perPage"); if (empty($perPage)) { $perPage = 10; } $page = $this->_getParam("page"); if (empty($page)) { $page = 1; } $queryStr = strtolower($queryFromRequest); $this->view->category = $categoryFromRequest; if (!empty($this->view->category)) { $category = $this->view->category; } else { $category = null; } $pluginConf = SearchPhp_Plugin::getSearchConfigArray(); if (!empty($pluginConf["search"]["frontend"]["categories"])) { $this->view->availableCategories = explode(",", $pluginConf["search"]["frontend"]["categories"]); } $doFuzzy = $this->_getParam("fuzzy"); try { $query = new Zend_Search_Lucene_Search_Query_Boolean(); $field = $this->_getParam("field"); if (!empty($field)) { Zend_Search_Lucene::setDefaultSearchField($field); } $searchResults = array(); if (!empty($queryStr)) { if ($doFuzzy) { $queryStr = str_replace(" ", "~ ", $queryStr); $queryStr .= "~"; Zend_Search_Lucene_Search_Query_Fuzzy::setDefaultPrefixLength(3); } $userQuery = Zend_Search_Lucene_Search_QueryParser::parse($queryStr, 'utf-8'); $query->addSubquery($userQuery, true); if (!empty($this->searchLanguage)) { if (is_object($this->searchLanguage)) { $lang = $this->searchLanguage->toString(); } else { $lang = $this->searchLanguage; } $lang = str_replace(array("_", "-"), "", $lang); $languageTerm = new Zend_Search_Lucene_Index_Term($lang, 'lang'); $languageQuery = new Zend_Search_Lucene_Search_Query_Term($languageTerm); $query->addSubquery($languageQuery, true); } if (!empty($category)) { $categoryTerm = new Zend_Search_Lucene_Index_Term($category, 'cat'); $categoryQuery = new Zend_Search_Lucene_Search_Query_Term($categoryTerm); $query->addSubquery($categoryQuery, true); } $hits = $this->frontendIndex->find($query); $validHits = array(); if ($this->ownHostOnly and $hits != null) { //get rid of hits from other hosts $currenthost = $_SERVER['HTTP_HOST']; for ($i = 0; $i < count($hits); $i++) { $url = $hits[$i]->getDocument()->getField("url"); if (strpos($url->value, "http://" . $currenthost) !== FALSE || strpos($url->value, "https://" . $currenthost) !== FALSE) { $validHits[] = $hits[$i]; } } } else { $validHits = $hits; } $start = $perPage * ($page - 1); $end = $start + ($perPage - 1); if ($end > count($validHits) - 1) { $end = count($validHits) - 1; } for ($i = $start; $i <= $end; $i++) { $hit = $validHits[$i]; $url = $hit->getDocument()->getField("url"); $title = $hit->getDocument()->getField("title"); $searchResult['boost'] = $hit->getDocument()->boost; $searchResult['title'] = $title->value; $searchResult['url'] = $url->value; $searchResult['sumary'] = $searcher->getSumaryForUrl($url->value, $queryStr); try { if ($hit->getDocument()->getField("h1")) { $searchResult['h1'] = $hit->getDocument()->getField("h1")->value; } } catch (Zend_Search_Lucene_Exception $e) { } foreach ($this->categories as $category) { try { $searchResult['categories'][] = $hit->getDocument()->getField("cat")->value; } catch (Zend_Search_Lucene_Exception $e) { } } $searchResults[] = $searchResult; unset($searchResult); } } if (count($validHits) < 1) { $this->view->pages = 0; } else { $this->view->pages = ceil(count($validHits) / $perPage); } $this->view->perPage = $perPage; $this->view->page = $page; $this->view->total = count($validHits); $this->view->query = $queryStr; $this->view->searchResults = $searchResults; if ($this->fuzzySearch) { //look for similar search terms if (!empty($queryStr) and (empty($searchResults) or count($searchResults) < 1)) { $terms = SearchPhp_Plugin::fuzzyFindTerms($queryStr, $this->frontendIndex, 3); if (empty($terms) or count($terms) < 1) { $terms = SearchPhp_Plugin::fuzzyFindTerms($queryStr, $this->frontendIndex, 0); } $suggestions = array(); if (is_array($terms)) { $counter = 0; foreach ($terms as $term) { $t = $term->text; //check if term can be found for current language if ($this->searchLanguage != null) { if (is_object($this->searchLanguage)) { $language = $this->searchLanguage->toString(); } else { $language = $this->searchLanguage; } $language = str_replace(array("_", "-"), "", $language); } $hits = null; $query = new Zend_Search_Lucene_Search_Query_Boolean(); if ($language != null) { $languageTerm = new Zend_Search_Lucene_Index_Term($language, 'lang'); $languageQuery = new Zend_Search_Lucene_Search_Query_Term($languageTerm); $query->addSubquery($languageQuery, true); } if (!empty($category)) { $categoryTerm = new Zend_Search_Lucene_Index_Term($category, 'cat'); $categoryQuery = new Zend_Search_Lucene_Search_Query_Term($categoryTerm); $query->addSubquery($categoryQuery, true); } $userQuery = Zend_Search_Lucene_Search_QueryParser::parse($t, 'utf-8'); $query->addSubquery($userQuery, true); $hits = $this->frontendIndex->find($query); $validHits = array(); if ($this->ownHostOnly and $hits != null) { //get rid of hits from other hosts $currenthost = $_SERVER['HTTP_HOST']; if (count($hits) == 1) { $url = $hits[0]->getDocument()->getField("url"); if (strpos($url->value, "http://" . $currenthost) !== FALSE || strpos($url->value, "https://" . $currenthost) !== FALSE) { $validHits[] = $hits[0]; } } for ($i = 0; $i < count($hits); $i++) { $url = $hits[$i]->getDocument()->getField("url"); if (strpos($url->value, "http://" . $currenthost) !== FALSE) { $validHits[] = $hits[$i]; } } } else { $validHits = $hits; } if (count($validHits) > 0 and !in_array($t, $suggestions)) { $suggestions[] = $t; if ($counter >= 20) { break; } $counter++; } } } $this->view->suggestions = $suggestions; } } } catch (Exception $e) { Logger::log("An Exception occured during search:", Zend_Log::ERR); Logger::log($e, Zend_Log::ERR); $this->view->searchResults = array(); } if ($this->_getParam("viewscript")) { $this->renderScript($this->_getParam("viewscript")); } }
public function startFrontendCrawlerAction() { SearchPhp_Plugin::forceCrawlerStartOnNextMaintenance("frontend"); $this->_helper->json(array("success" => true)); }
}); } } },{ xtype:'button', style: 'margin: 0 0 0 5px', hideLabel: true, text: '<?php echo $this->translate->_("searchphp_stop_crawler"); ?> ', id: 'stopFrontendCrawler', disabled: <?php if (!SearchPhp_Plugin::frontendCrawlerRunning()) { echo 'true'; } else { echo 'false'; } ?> , listeners: { click: function(button, event) { searchPhpCrawlerLoadingMask = new Ext.LoadMask(Ext.get("f1Form"), { id:"crawler-stop-mask", msg:"<?php echo $this->translate->_("searchphp_please_wait"); ?> "