Beispiel #1
0
 protected function checkAndPrepareIndex()
 {
     if (!$this->index) {
         $indexDir = SearchPhp_Plugin::getFrontendSearchIndex();
         //switch to tmpIndex
         $indexDir = str_replace("/index", "/tmpindex", $indexDir);
         try {
             Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive());
             $this->index = Zend_Search_Lucene::open($indexDir);
         } catch (Exception $e) {
             logger::log(get_class($this) . ": could not open frontend index, creating new one.", Zend_Log::WARN);
             Zend_Search_Lucene::create($indexDir);
             $this->index = Zend_Search_Lucene::open($indexDir);
         }
     }
 }
ini_set("max_execution_time", "-1");
logger::log("SearchPhp_Plugin: Starting crawl", Zend_Log::DEBUG);
//TODO nix specific
exec("rm -Rf " . str_replace("/index", "/tmpindex", $indexDir) . " " . $indexDir);
$confArray = SearchPhp_Plugin::getSearchConfigArray();
$urls = explode(",", $confArray['search']['frontend']['urls']);
$validLinkRegexes = explode(",", $confArray['search']['frontend']['validLinkRegexes']);
$invalidLinkRegexes = explode(",", $confArray['search']['frontend']['invalidLinkRegexes']);
$rawConfig = new Zend_Config_Xml(PIMCORE_PLUGINS_PATH . SearchPhp_Plugin::$configFile);
$rawConfigArray = $rawConfig->toArray();
$rawConfigArray['search']['frontend']['crawler']['running'] = 1;
$rawConfigArray['search']['frontend']['crawler']['started'] = time();
$config = new Zend_Config($rawConfigArray, true);
$writer = new Zend_Config_Writer_Xml(array("config" => $config, "filename" => PIMCORE_PLUGINS_PATH . SearchPhp_Plugin::$configFile));
$writer->write();
$crawler = new SearchPhp_Frontend_Crawler($validLinkRegexes, $invalidLinkRegexes, 10, 30, $confArray['search']['frontend']['crawler']['contentStartIndicator'], $confArray['search']['frontend']['crawler']['contentEndIndicator']);
$crawler->findLinks($urls);
$rawConfig = new Zend_Config_Xml(PIMCORE_PLUGINS_PATH . SearchPhp_Plugin::$configFile);
$rawConfigArray = $rawConfig->toArray();
$rawConfigArray['search']['frontend']['crawler']['running'] = 0;
$rawConfigArray['search']['frontend']['crawler']['finished'] = time();
$config = new Zend_Config($rawConfigArray, true);
$writer = new Zend_Config_Writer_Xml(array("config" => $config, "filename" => PIMCORE_PLUGINS_PATH . SearchPhp_Plugin::$configFile));
$writer->write();
logger::log("SearchPhp_Plugin: replacing old index ...", Zend_Log::DEBUG);
$indexDir = SearchPhp_Plugin::getFrontendSearchIndex();
//TODO nix specific
exec("rm -Rf " . $indexDir);
exec("mv " . str_replace("/index", "/tmpindex", $indexDir) . " " . $indexDir);
logger::log("Search_PluginPhp: replaced old index", Zend_Log::DEBUG);
logger::log("Search_PluginPhp: Finished crawl", Zend_Log::DEBUG);
 public function findAction()
 {
     $queryFromRequest = $this->cleanRequestString($_REQUEST["query"]);
     $categoryFromRequest = $this->cleanRequestString($_REQUEST["cat"]);
     $searcher = new SearchPhp_Frontend_Searcher();
     $this->view->groupByCategory = $this->_getParam("groupByCategory");
     $this->view->omitSearchForm = $this->_getParam("omitSearchForm");
     $this->view->categoryOrder = $this->_getParam("categoryOrder");
     $this->view->omitJsIncludes = $this->_getParam("omitJsIncludes");
     $perPage = $this->_getParam("perPage");
     if (empty($perPage)) {
         $perPage = 10;
     }
     $page = $this->_getParam("page");
     if (empty($page)) {
         $page = 1;
     }
     $queryStr = strtolower($queryFromRequest);
     $this->view->category = $categoryFromRequest;
     if (!empty($this->view->category)) {
         $category = $this->view->category;
     } else {
         $category = null;
     }
     $pluginConf = SearchPhp_Plugin::getSearchConfigArray();
     if (!empty($pluginConf["search"]["frontend"]["categories"])) {
         $this->view->availableCategories = explode(",", $pluginConf["search"]["frontend"]["categories"]);
     }
     $doFuzzy = $this->_getParam("fuzzy");
     try {
         $query = new Zend_Search_Lucene_Search_Query_Boolean();
         $field = $this->_getParam("field");
         if (!empty($field)) {
             Zend_Search_Lucene::setDefaultSearchField($field);
         }
         $searchResults = array();
         if (!empty($queryStr)) {
             if ($doFuzzy) {
                 $queryStr = str_replace(" ", "~ ", $queryStr);
                 $queryStr .= "~";
                 Zend_Search_Lucene_Search_Query_Fuzzy::setDefaultPrefixLength(3);
             }
             $userQuery = Zend_Search_Lucene_Search_QueryParser::parse($queryStr, 'utf-8');
             $query->addSubquery($userQuery, true);
             if (!empty($this->searchLanguage)) {
                 if (is_object($this->searchLanguage)) {
                     $lang = $this->searchLanguage->toString();
                 } else {
                     $lang = $this->searchLanguage;
                 }
                 $lang = str_replace(array("_", "-"), "", $lang);
                 $languageTerm = new Zend_Search_Lucene_Index_Term($lang, 'lang');
                 $languageQuery = new Zend_Search_Lucene_Search_Query_Term($languageTerm);
                 $query->addSubquery($languageQuery, true);
             }
             if (!empty($category)) {
                 $categoryTerm = new Zend_Search_Lucene_Index_Term($category, 'cat');
                 $categoryQuery = new Zend_Search_Lucene_Search_Query_Term($categoryTerm);
                 $query->addSubquery($categoryQuery, true);
             }
             $hits = $this->frontendIndex->find($query);
             $validHits = array();
             if ($this->ownHostOnly and $hits != null) {
                 //get rid of hits from other hosts
                 $currenthost = $_SERVER['HTTP_HOST'];
                 for ($i = 0; $i < count($hits); $i++) {
                     $url = $hits[$i]->getDocument()->getField("url");
                     if (strpos($url->value, "http://" . $currenthost) !== FALSE || strpos($url->value, "https://" . $currenthost) !== FALSE) {
                         $validHits[] = $hits[$i];
                     }
                 }
             } else {
                 $validHits = $hits;
             }
             $start = $perPage * ($page - 1);
             $end = $start + ($perPage - 1);
             if ($end > count($validHits) - 1) {
                 $end = count($validHits) - 1;
             }
             for ($i = $start; $i <= $end; $i++) {
                 $hit = $validHits[$i];
                 $url = $hit->getDocument()->getField("url");
                 $title = $hit->getDocument()->getField("title");
                 $searchResult['boost'] = $hit->getDocument()->boost;
                 $searchResult['title'] = $title->value;
                 $searchResult['url'] = $url->value;
                 $searchResult['sumary'] = $searcher->getSumaryForUrl($url->value, $queryStr);
                 try {
                     if ($hit->getDocument()->getField("h1")) {
                         $searchResult['h1'] = $hit->getDocument()->getField("h1")->value;
                     }
                 } catch (Zend_Search_Lucene_Exception $e) {
                 }
                 foreach ($this->categories as $category) {
                     try {
                         $searchResult['categories'][] = $hit->getDocument()->getField("cat")->value;
                     } catch (Zend_Search_Lucene_Exception $e) {
                     }
                 }
                 $searchResults[] = $searchResult;
                 unset($searchResult);
             }
         }
         if (count($validHits) < 1) {
             $this->view->pages = 0;
         } else {
             $this->view->pages = ceil(count($validHits) / $perPage);
         }
         $this->view->perPage = $perPage;
         $this->view->page = $page;
         $this->view->total = count($validHits);
         $this->view->query = $queryStr;
         $this->view->searchResults = $searchResults;
         if ($this->fuzzySearch) {
             //look for similar search terms
             if (!empty($queryStr) and (empty($searchResults) or count($searchResults) < 1)) {
                 $terms = SearchPhp_Plugin::fuzzyFindTerms($queryStr, $this->frontendIndex, 3);
                 if (empty($terms) or count($terms) < 1) {
                     $terms = SearchPhp_Plugin::fuzzyFindTerms($queryStr, $this->frontendIndex, 0);
                 }
                 $suggestions = array();
                 if (is_array($terms)) {
                     $counter = 0;
                     foreach ($terms as $term) {
                         $t = $term->text;
                         //check if term can be found for current language
                         if ($this->searchLanguage != null) {
                             if (is_object($this->searchLanguage)) {
                                 $language = $this->searchLanguage->toString();
                             } else {
                                 $language = $this->searchLanguage;
                             }
                             $language = str_replace(array("_", "-"), "", $language);
                         }
                         $hits = null;
                         $query = new Zend_Search_Lucene_Search_Query_Boolean();
                         if ($language != null) {
                             $languageTerm = new Zend_Search_Lucene_Index_Term($language, 'lang');
                             $languageQuery = new Zend_Search_Lucene_Search_Query_Term($languageTerm);
                             $query->addSubquery($languageQuery, true);
                         }
                         if (!empty($category)) {
                             $categoryTerm = new Zend_Search_Lucene_Index_Term($category, 'cat');
                             $categoryQuery = new Zend_Search_Lucene_Search_Query_Term($categoryTerm);
                             $query->addSubquery($categoryQuery, true);
                         }
                         $userQuery = Zend_Search_Lucene_Search_QueryParser::parse($t, 'utf-8');
                         $query->addSubquery($userQuery, true);
                         $hits = $this->frontendIndex->find($query);
                         $validHits = array();
                         if ($this->ownHostOnly and $hits != null) {
                             //get rid of hits from other hosts
                             $currenthost = $_SERVER['HTTP_HOST'];
                             if (count($hits) == 1) {
                                 $url = $hits[0]->getDocument()->getField("url");
                                 if (strpos($url->value, "http://" . $currenthost) !== FALSE || strpos($url->value, "https://" . $currenthost) !== FALSE) {
                                     $validHits[] = $hits[0];
                                 }
                             }
                             for ($i = 0; $i < count($hits); $i++) {
                                 $url = $hits[$i]->getDocument()->getField("url");
                                 if (strpos($url->value, "http://" . $currenthost) !== FALSE) {
                                     $validHits[] = $hits[$i];
                                 }
                             }
                         } else {
                             $validHits = $hits;
                         }
                         if (count($validHits) > 0 and !in_array($t, $suggestions)) {
                             $suggestions[] = $t;
                             if ($counter >= 20) {
                                 break;
                             }
                             $counter++;
                         }
                     }
                 }
                 $this->view->suggestions = $suggestions;
             }
         }
     } catch (Exception $e) {
         Logger::log("An Exception occured during search:", Zend_Log::ERR);
         Logger::log($e, Zend_Log::ERR);
         $this->view->searchResults = array();
     }
     if ($this->_getParam("viewscript")) {
         $this->renderScript($this->_getParam("viewscript"));
     }
 }
 public function startFrontendCrawlerAction()
 {
     SearchPhp_Plugin::forceCrawlerStartOnNextMaintenance("frontend");
     $this->_helper->json(array("success" => true));
 }
                            });

                        }
                    }

                },{
                    xtype:'button',
                    style: 'margin: 0 0 0 5px',  
                    hideLabel: true,
                    text: '<?php 
echo $this->translate->_("searchphp_stop_crawler");
?>
',
                    id: 'stopFrontendCrawler',
                    disabled: <?php 
if (!SearchPhp_Plugin::frontendCrawlerRunning()) {
    echo 'true';
} else {
    echo 'false';
}
?>
,
                    listeners: {
                        click: function(button, event) {

                            searchPhpCrawlerLoadingMask = new Ext.LoadMask(Ext.get("f1Form"), {
                                id:"crawler-stop-mask",
                                msg:"<?php 
echo $this->translate->_("searchphp_please_wait");
?>
"