/** * DOCUMENT ME * @param mixed $arguments * @param mixed $options */ protected function execute($arguments = array(), $options = array()) { // initialize the database connection $databaseManager = new sfDatabaseManager($this->configuration); $connection = $databaseManager->getDatabase($options['connection'] ? $options['connection'] : null)->getConnection(); // PDO connection not so useful, get the doctrine one $conn = Doctrine_Manager::connection(); $context = sfContext::createInstance($this->configuration); if (isset($options['table'])) { $indexes = array($options['table']); } else { $indexes = sfConfig::get('app_aToolkit_indexes', array()); } $count = 0; foreach ($indexes as $index) { $table = Doctrine::getTable($index); if ($options['verbose']) { echo "Optimizing {$index}\n"; } aZendSearch::optimizeLuceneIndex($table); } if ($options['verbose']) { echo "Success!\n"; } }
protected function execute($arguments = array(), $options = array()) { // Memory usage is a bit high here because we look at every page, and the Rackspace Cloud // environment has a very low default memory limit for their ersatz "cron jobs." // TODO: prioritize a low-memory solution for rebuild-search-index, which will be // necessary for large sites anyway ini_set('memory_limit', '256M'); // initialize the database connection $databaseManager = new sfDatabaseManager($this->configuration); $connection = $databaseManager->getDatabase($options['connection'] ? $options['connection'] : null)->getConnection(); // Initialize the context, which loading use of helpers, notably url_for // First set config vars so that reasonable siteless-but-rooted URLs can be generated // TODO: think about ways to make this work for people who like frontend_dev.php etc., although // we're doing rather well with an index.php that suits each environment sfConfig::set('sf_no_script_name', true); $_SERVER['PHP_SELF'] = ''; $_SERVER['SCRIPT_NAME'] = ''; $context = sfContext::createInstance($this->configuration); if (isset($options['table'])) { $indexes = array($options['table']); } else { $indexes = sfConfig::get('app_aToolkit_indexes', array()); } $count = 0; foreach ($indexes as $index) { $table = Doctrine::getTable($index); if ($index === 'aPage') { aZendSearch::purgeLuceneIndex($table); // We're about to request updates of all page/culture combinations. Don't // add that to an existing workload which could result in a huge pileup of // repeat requests if someone starts interrupting this task and trying again, etc. $this->query('DELETE FROM a_lucene_update'); $pages = Doctrine::getTable('aPage')->createQuery('p')->innerJoin('p.Areas a')->execute(array(), Doctrine::HYDRATE_ARRAY); foreach ($pages as $page) { $cultures = array(); foreach ($page['Areas'] as $area) { $cultures[$area['culture']] = true; } $cultures = array_keys($cultures); foreach ($cultures as $culture) { $this->query('INSERT INTO a_lucene_update (page_id, culture) VALUES (:page_id, :culture)', array('page_id' => $page['id'], 'culture' => $culture)); } } while (true) { $result = $this->query('SELECT COUNT(id) AS total FROM a_lucene_update'); $count = $result[0]['total']; if ($count == 0) { break; } $this->logSection('toolkit', "{$count} pages remain to be indexed, starting another update pass..."); $this->update(); } } else { // We don't have a deferred update feature for other tables, // so we'll have to get them done in the memory available $table->rebuildLuceneIndex(); } $this->logSection('toolkit', sprintf('Index for "%s" rebuilt', $index)); } }
/** * Executes feedback action * */ public function executeFeedback(sfRequest $request) { $section = $request->getParameter('section', false); $this->form = new aFeedbackForm($section); $this->feedbackSubmittedBy = false; $this->failed = false; sfContext::getInstance()->getConfiguration()->loadHelpers(array('Tag', 'Url')); if ($request->isMethod('post')) { $this->form->bind($request->getParameter('feedback'), $request->getFiles('feedback')); // $this->form->bind(array_merge($request->getParameter('feedback'), array('captcha' => $request->getParameter('captcha'))), $request->getFiles('feedback')); if ($this->form->isValid()) { $feedback = $this->form->getValues(); $feedback['browser'] = $_SERVER['HTTP_USER_AGENT']; try { aZendSearch::registerZend(); $mail = new Zend_Mail(); $mail->setBodyText($this->getPartial('feedbackEmailText', array('feedback' => $feedback)))->setFrom($feedback['email'], $feedback['name'])->addTo(sfConfig::get('app_aFeedback_email_auto'))->setSubject($this->form->getValue('subject', 'New aBugReport submission')); if ($screenshot = $this->form->getValue('screenshot')) { $mail->createAttachment(file_get_contents($screenshot->getTempName()), $screenshot->getType()); } $mail->send(); // A new form for a new submission $this->form = new aFeedbackForm(); } catch (Exception $e) { $this->logMessage('Request email failed: ' . $e->getMessage(), 'err'); $this->failed = true; return 'Success'; } $this->getUser()->setFlash('reportSubmittedBy', $feedback['name']); $this->redirect($feedback['section']); } } }
public function executeSearch(sfWebRequest $request) { // create the array of pages matching the query $q = $request->getParameter('q'); if ($request->hasParameter('x')) { // We like to use input type="image" for presentation reasons, but it generates // ugly x and y parameters with click coordinates. Get rid of those and come back. return $this->redirect(sfContext::getInstance()->getController()->genUrl('a/search', true) . '?' . http_build_query(array("q" => $q))); } $key = strtolower(trim($q)); $key = preg_replace('/\\s+/', ' ', $key); $replacements = sfConfig::get('app_a_search_refinements', array()); if (isset($replacements[$key])) { $q = $replacements[$key]; } $values = aZendSearch::searchLuceneWithValues(Doctrine::getTable('aPage'), $q, aTools::getUserCulture()); $nvalues = array(); foreach ($values as $value) { // doesn't implement isset if (strlen($value->info)) { $info = unserialize($value->info); if (!aPageTable::checkPrivilege('view', $info)) { continue; } } $nvalue = $value; if (substr($nvalue->slug, 0, 1) === '@') { // Virtual page slug is a named Symfony route, it wants search results to go there $nvalue->url = $this->getController()->genUrl($nvalue->slug, true); } else { $slash = strpos($nvalue->slug, '/'); if ($slash === false) { // A virtual page (such as global) taht isn't the least bit interested in // being part of search results continue; } if ($slash > 0) { // A virtual page slug which is a valid Symfony route, such as foo/bar?id=55 $nvalue->url = $this->getController()->genUrl($nvalue->slug, true); } else { // A normal CMS page $nvalue->url = aTools::urlForPage($nvalue->slug); } } $nvalue->class = 'aPage'; $nvalues[] = $nvalue; } $values = $nvalues; if ($this->searchAddResults($values, $q)) { usort($values, "aActions::compareScores"); } $this->pager = new aArrayPager(null, sfConfig::get('app_a_search_results_per_page', 10)); $this->pager->setResultArray($values); $this->pager->setPage($request->getParameter('page', 1)); $this->pager->init(); $this->pagerUrl = "a/search?" . http_build_query(array("q" => $q)); // setTitle takes care of escaping things $this->getResponse()->setTitle(aTools::getOptionI18n('title_prefix') . 'Search for ' . $q . aTools::getOptionI18n('title_suffix')); $this->results = $this->pager->getResults(); }
public function updateLuceneIndex() { $title = $this->getTitle(); $summary = $this->getSearchSummary(); $text = $this->getSearchText(); $slug = $this->getSlug(); aZendSearch::updateLuceneIndex($this, array('text' => $text), $this->getCulture(), array('title' => $title, 'summary' => $summary, 'slug' => $slug, 'view_is_secure' => $this->getViewIsSecure())); }
public static function registerZend() { if (self::$zendLoaded) { return; } // Zend 1.8.0 and thereafter include_once 'Zend/Loader/Autoloader.php'; $loader = Zend_Loader_Autoloader::getInstance(); // NOT the default autoloader, Symfony's is the default. // Thanks to Guglielmo Celata // $loader->setFallbackAutoloader(true); $loader->suppressNotFoundWarnings(false); // Before Zend 1.8.0 // require_once 'Zend/Loader.php'; // Zend_Loader::registerAutoload(); self::$zendLoaded = true; }
/** * DOCUMENT ME */ public function updateLuceneIndex() { aZendSearch::updateLuceneIndex(array('object' => $this, 'indexed' => array('type' => $this->getType(), 'title' => $this->getTitle(), 'description' => $this->getDescription(), 'credit' => $this->getCredit(), 'categories' => implode(", ", $this->getCategoryNames()), 'tags' => implode(", ", $this->getTags())))); }
public static function filterForEngine($options) { // This method filters the virtual pages, tags and categories associated with a particular engine based on // specified criteria such as tag, category, publication date, etc. // Strategy: do Lucene queries and direct SQL queries that will get us all the info about relevant categories, // tags and virtual pages. Then turn that into a select distinct query for each of those things. The resulting // information is sufficient to populate the filters sidebar with options that are still relevant given the // other criteria in effect, and also to fetch the result pages (you'll want to do that with a LIMIT and an IN // query looking at the first n IDs returned by this method). // The options array looks like this. Note that all of these are optional and if each is unspecified or empty // no restriction is made on that particular basis. 'categoryIds' is used to limit to the categories associated // with the engine page, while 'categorySlug' is used to limit to a category specified by the user as a // filter. The 'q' option is Lucene search. // array( // 'q' => 'gromit', // 'categoryIds' => array(1, 3, 5), // 'categorySlug' => 'cheese', // 'tag' => 'wensleydale', // 'slugStem' => '@a_event_search_redirect', // 'year' => 2010, # Optional, if present only 2010 is shown // 'month' => 12, # Optional, if present only Dec. 2010 is shown // 'day' => 15, # Optional, if present only Dec. 15th 2010 is shown // 'byEventDateRange' => true, # For events only, joins with a_blog_item to get the range // 'byPublishedAt' => true, # For blog posts or pages // The returned value looks like this: // array( // 'categoriesInfo' => array('slug' => 'cheese', 'name' => 'Cheese'), // 'tagNames' => array('wensleydale'), // 'pageIds' => array(10, 15, 20, 25) $alphaSort = isset($options['alphaSort']) && $options['alphaSort']; if (isset($options['q']) && strlen($options['q'])) { $q = $options['q']; $key = strtolower(trim($q)); $key = preg_replace('/\\s+/', ' ', $key); $replacements = sfConfig::get('app_a_search_refinements', array()); if (isset($replacements[$key])) { $q = $replacements[$key]; } if (isset($options['slugStem'])) { $q = "({$q}) AND slug:" . $options['slugStem']; } try { $values = aZendSearch::searchLuceneWithValues(Doctrine::getTable('aPage'), $q, aTools::getUserCulture()); } catch (Exception $e) { // Lucene search error. TODO: display it nicely if they are always safe things to display. For now: just don't crash $values = array(); } $now = date('YmdHis'); $pageIds = array(); foreach ($values as $value) { // Regardless of the above if it ain't published yet we can't see it. // We filter on that in the Doctrine query too but take advantage of // this chance to preempt a little work if ($value->published_at > $now) { continue; } // 1.5: the names under which we store columns in Zend Lucene have changed to // avoid conflict with also indexing them $info = unserialize($value->info_stored); if (!aPageTable::checkPrivilege('view', $info)) { continue; } $pageIds[] = $info['id']; } } $mysql = new aMysql(); if (isset($options['slugStem'])) { $params['slug_pattern'] = $options['slugStem'] . '%'; } // Select the relevant virtual pages for this engine $q = 'from a_page p '; // If alpha sort is present we need title slots if ($alphaSort) { if (!isset($options['culture'])) { $options['culture'] = aTools::getUserCulture(); } $culture = $options['culture']; $q .= "\n LEFT JOIN a_area a ON a.page_id = p.id AND a.name = 'title' AND a.culture = :culture\n LEFT JOIN a_area_version v ON v.area_id = a.id AND a.latest_version = v.version \n LEFT JOIN a_area_version_slot avs ON avs.area_version_id = v.id\n LEFT JOIN a_slot s ON s.id = avs.slot_id "; $params['culture'] = $culture; } // Merge in categories. A left join unless we are restricted to certain categories $hasCategoryIds = isset($options['categoryIds']) && count($options['categoryIds']); $hasCategorySlug = isset($options['categorySlug']) && strlen($options['categorySlug']); $restrictedByCategory = $hasCategoryIds || $hasCategorySlug; if ($restrictedByCategory) { $cjoin = 'inner join'; } else { $cjoin = 'left join'; } $q .= $cjoin . ' a_page_to_category ptc on ptc.page_id = p.id ' . $cjoin . ' a_category c on ptc.category_id = c.id '; // The engine page is locked down to these categories. If none are specified it is not // locked down by category if ($hasCategoryIds) { $q .= "and c.id in :category_ids "; $params['category_ids'] = $options['categoryIds']; } // Bring in tags... $hasTag = isset($options['tag']) && strlen($options['tag']); if ($hasTag) { $q .= 'inner join '; } else { $q .= 'left join '; } $q .= 'tagging ti on ti.taggable_id = p.id and ti.taggable_model = "aPage" left join tag t on ti.tag_id = t.id '; // Get ready to filter posts or events chronologically $year = sprintf("%04d", isset($options['year']) ? $options['year'] : 0); $month = sprintf("%02d", isset($options['month']) ? $options['month'] : 0); $day = sprintf("%02d", isset($options['day']) ? $options['day'] : 0); $startYear = $year; $endYear = $year; if ($year > 0) { if ($month == 0) { // Do not mess up the two digit strings please $startMonth = '01'; $startDay = '01'; $endMonth = '12'; $endDay = '31'; } else { $startMonth = $month; $endMonth = $month; if ($day == 0) { // Do not mess up the two digit strings please $startDay = '01'; $endDay = '31'; } else { $startDay = $day; $endDay = $day; } } } else { // For posts "today and forward" is not a relevant concept (and a separate clause // already makes sure we don't see unpublished stuff). For events we'll override // the start date below $startYear = '0000'; $startMonth = '01'; $startDay = '01'; $endYear = '9999'; $endMonth = '12'; $endDay = '31'; } $events = isset($options['byEventDateRange']) && $options['byEventDateRange']; if ($events && $startYear === '0000') { list($startYear, $startMonth, $startDay) = preg_split('/-/', date('Y-m-d')); } if ($events) { // The event's start and end dates are part of the blog item table $q .= ' inner join a_blog_item bi on bi.page_id = p.id '; $q .= "and bi.start_date <= :end_date "; $params['end_date'] = "{$endYear}-{$endMonth}-{$endDay}"; $q .= "and bi.end_date >= :start_date "; $params['start_date'] = "{$startYear}-{$startMonth}-{$startDay}"; } // Criteria for the pages themselves $q .= 'where p.slug like :slug_pattern '; // We often filter posts (not events) by a range of publication dates if (isset($options['byPublishedAt']) && $options['byPublishedAt']) { $q .= "and p.published_at <= :p_end_date "; $params['p_end_date'] = "{$endYear}-{$endMonth}-{$endDay}"; $q .= "and p.published_at >= :p_start_date "; $params['p_start_date'] = "{$startYear}-{$startMonth}-{$startDay}"; } // In no case do we show unpublished material $q .= 'and p.published_at <= NOW() and (p.archived IS NULL or p.archived IS FALSE) '; // ... But only those matching the Lucene search that already gave us specific IDs. // NOTE: if pageIds is not null and is empty, NOTHING should be returned // (someone searched for something that doesn't appear in the system) if (isset($pageIds)) { if (count($pageIds)) { $q .= 'and p.id in :pageIds '; $params['pageIds'] = $pageIds; } else { $q .= 'and 0 <> 0 '; } } if ($alphaSort) { $pagesOrderBy = 's.value asc'; } elseif ($events) { $pagesOrderBy = 'bi.start_date asc, bi.start_time asc'; } else { // Oops: blog presentation is typically descending, not ascending $pagesOrderBy = 'p.published_at desc'; } // Separate queries, but quite fast because we're not bogged down in Doctrineland $c_q = $q; $t_q = $q; $p_q = $q; // We are filtering by this specific category if ($hasCategorySlug) { // Limit tags and pages by this specific category, but don't limit // categories by it, otherwise we can't present a choice of categories // meeting the other criteria $t_q .= "and c.slug = :category_slug "; $p_q .= "and c.slug = :category_slug "; $params['category_slug'] = $options['categorySlug']; } if ($hasTag) { // Limit pages and categories by this specific tag, but don't limit // tags by it, otherwise we can't present a choice of tags // meeting the other criteria $p_q .= 'and t.name = :tag_name '; $c_q .= 'and t.name = :tag_name '; $params['tag_name'] = $options['tag']; } // In the cases where we are looking for categories or tags, be sure to // discard the null rows from the LEFT JOINs. This is simpler than // determining when to switch them to INNER JOINs $result = array('categoriesInfo' => $mysql->query('select distinct c.slug, c.name ' . $c_q . 'and c.slug is not null order by c.name', $params), 'tagsByName' => $mysql->query('select t.name, count(distinct p.id) as t_count ' . $t_q . 'and t.name is not null group by t.name order by t.name', $params), 'tagsByPopularity' => $mysql->query('select t.name, count(distinct p.id) as t_count ' . $t_q . 'and t.name is not null group by t.name order by t_count desc limit 10', $params), 'pageIds' => $mysql->queryScalar('select distinct p.id ' . $p_q . ' order by ' . $pagesOrderBy, $params)); return $result; }
public function updateLuceneIndex() { aZendSearch::updateLuceneIndex($this, array('type' => $this->getType(), 'title' => $this->getTitle(), 'description' => $this->getDescription(), 'credit' => $this->getCredit(), 'tags' => implode(", ", $this->getTags()))); }
/** * DOCUMENT ME * @param Doctrine_Query $q * @param mixed $luceneQuery * @return mixed */ public function addSearchQuery(Doctrine_Query $q = null, $luceneQuery) { return aZendSearch::addSearchQuery($this, $q, $luceneQuery); }
public function executeSearch(sfWebRequest $request) { // create the array of pages matching the query $q = $request->getParameter('q'); if ($request->hasParameter('x')) { // We like to use input type="image" for presentation reasons, but it generates // ugly x and y parameters with click coordinates. Get rid of those and come back. return $this->redirect(sfContext::getInstance()->getController()->genUrl('a/search', true) . '?' . http_build_query(array("q" => $q))); } $key = strtolower(trim($q)); $key = preg_replace('/\\s+/', ' ', $key); $replacements = sfConfig::get('app_a_search_refinements', array()); if (isset($replacements[$key])) { $q = $replacements[$key]; } $values = aZendSearch::searchLuceneWithValues(Doctrine::getTable('aPage'), $q, aTools::getUserCulture()); $nvalues = array(); foreach ($values as $value) { if (!sfContext::getInstance()->getUser()->isAuthenticated()) { if (isset($value->view_is_secure) && $value->view_is_secure) { continue; } } $nvalue = $value; $nvalue->url = aTools::urlForPage($nvalue->slug, true); $nvalue->class = 'aPage'; $nvalues[] = $nvalue; } $values = $nvalues; if ($this->searchAddResults($values, $q)) { usort($values, "aActions::compareScores"); } $this->pager = new aArrayPager(null, sfConfig::get('app_a_search_results_per_page', 10)); $this->pager->setResultArray($values); $this->pager->setPage($request->getParameter('page', 1)); $this->pager->init(); $this->pagerUrl = "a/search?" . http_build_query(array("q" => $q)); // setTitle takes care of escaping things $this->getResponse()->setTitle(aTools::getOptionI18n('title_prefix') . 'Search for ' . $q); $this->results = $this->pager->getResults(); }
public function executeContactList(sfWebRequest $request) { $this->checkProcess($request); $nameQuery = $this->registration->getFullName(); $nameQuery = aZendSearch::luceneRemoveSpecialchars( $nameQuery ); $this->luceneQuery = ($request->getParameter('query')) ? $request->getParameter('query') : $nameQuery; // $this->luceneQuery = ($this->luceneQuery == '') ? null : $this->luceneQuery; $this->query = $this->luceneQuery; //$this->luceneQuery = $this->registration->getFirstname(); $this->pager = $this->contact_getPager(); $this->contacts = $this->contact_buildQuery()->execute() ; $this->setTemplate('1_contactList'); }
public function createLuceneQuery($query = '') { // remove multiple whitespaces and trim ! // $andQuery = // implode ( " AND ", explode( " ", trim( preg_replace('/\s+/', ' ',$query)) )); $andQuery = aZendSearch::luceneCombineAnd( $query ); $hits = self::getLuceneIndex()->find($andQuery); // $query $pks = array(); foreach ($hits as $hit) { $pks[] = $hit->pk; } if (empty($pks)) { $pks[] = null; // return array(); } $q = $this->createQuery('a') ->andWhereIn('a.id', $pks); // $q = $this->addActiveJobsQuery($q); return $q; }
public function updateLuceneIndex() { if ($this->getAdmin()) { // Never index admin pages, that goes against the spirit of // keeping them completely out of navigation, they are not // a place for content in the normal sense, they are engines // for administrative purposes return; } $title = $this->getTitle(); $summary = $this->getSearchSummary(); $text = $this->getSearchText(); $slug = $this->getSlug(); $info = $this->getInfo(); // Already a separate field, so don't store it twice. // Otherwise though the info structure is well worth it because // it lets us check explicit privileges unset($info['title']); aZendSearch::updateLuceneIndex($this, array('text' => $text), $this->getCulture(), array('title' => $title, 'summary' => $summary, 'slug' => $slug, 'info' => serialize($info))); }
/** * Just a hook used by the above * @param mixed $query * @param mixed $culture * @return mixed */ public function searchLuceneWithScores($query, $culture) { return aZendSearch::searchLuceneWithScores($this, $query, $culture); }
/** * DOCUMENT ME * @param mixed $arguments * @param mixed $options */ protected function execute($arguments = array(), $options = array()) { // We've come a long way in reducing memory usage here, but it's still an expensive job ini_set('memory_limit', '256M'); // initialize the database connection $databaseManager = new sfDatabaseManager($this->configuration); $connection = $databaseManager->getDatabase($options['connection'] ? $options['connection'] : null)->getConnection(); // Initialize the context, which loading use of helpers, notably url_for // First set config vars so that reasonable siteless-but-rooted URLs can be generated // TODO: think about ways to make this work for people who like frontend_dev.php etc., although // we're doing rather well with an index.php that suits each environment sfConfig::set('sf_no_script_name', true); $_SERVER['PHP_SELF'] = ''; $_SERVER['SCRIPT_NAME'] = ''; $context = sfContext::createInstance($this->configuration); if (isset($options['table'])) { $indexes = array($options['table']); } else { $indexes = sfConfig::get('app_aToolkit_indexes', array()); } $count = 0; foreach ($indexes as $index) { $table = Doctrine::getTable($index); if ($index === 'aPage') { aZendSearch::purgeLuceneIndex($table); // We're about to request updates of all page/culture combinations. Don't // add that to an existing workload which could result in a huge pileup of // repeat requests if someone starts interrupting this task and trying again, etc. $this->query('DELETE FROM a_lucene_update'); $pages = Doctrine::getTable('aPage')->createQuery('p')->innerJoin('p.Areas a')->execute(array(), Doctrine::HYDRATE_ARRAY); foreach ($pages as $page) { $cultures = array(); foreach ($page['Areas'] as $area) { $cultures[$area['culture']] = true; } $cultures = array_keys($cultures); foreach ($cultures as $culture) { $this->query('INSERT INTO a_lucene_update (page_id, culture) VALUES (:page_id, :culture)', array('page_id' => $page['id'], 'culture' => $culture)); } } while (true) { $result = $this->query('SELECT COUNT(id) AS total FROM a_lucene_update'); $count = $result[0]['total']; if ($count == 0) { break; } if ($options['verbose']) { $this->logSection('toolkit', "{$count} pages remain to be indexed, starting another update pass..."); } $this->update('aPage', $options); } } else { if ($table->hasField('lucene_dirty')) { aZendSearch::purgeLuceneIndex($table); $tableSqlName = $table->getTableName(); // Use Doctrine update and count queries to get the performance while // retaining compatibility with aggregate inheritance "tables" like // dukeTubesArticle and dukeTubesEvent. With raw SQL we get confused // because we run out of objects that Doctrine recognizes as being of the // relevant type but we marked everything in the table as "dirty" Doctrine_Query::create()->update($index)->set('lucene_dirty', true)->execute(); while (true) { $count = $table->createQuery('q')->where('q.lucene_dirty IS TRUE')->count(); if ($count == 0) { break; } if ($options['verbose']) { $this->logSection('toolkit', "{$count} {$index} objects remain to be indexed, starting another update pass..."); } $this->update($index, $options); } } else { // We don't have a deferred update feature for other tables, // so we'll have to get them done in the memory available $table->rebuildLuceneIndex(); } } if ($options['verbose']) { $this->logSection('toolkit', sprintf('Index for "%s" rebuilt', $index)); } } }
/** * DOCUMENT ME * @return mixed */ public static function registerZend() { if (self::$zendLoaded) { return; } // Zend 1.8.0 and thereafter include_once 'Zend/Loader/Autoloader.php'; $loader = Zend_Loader_Autoloader::getInstance(); // NOT the default autoloader, Symfony's is the default. // Thanks to Guglielmo Celata // $loader->setFallbackAutoloader(true); $loader->suppressNotFoundWarnings(false); // Before Zend 1.8.0 // require_once 'Zend/Loader.php'; // Zend_Loader::registerAutoload(); self::$zendLoaded = true; // UTF8 tokenizer can be turned off if you don't have now off by default because it is really, really ignorant of English, // it can't even cope with plural vs singular, much less stemming // Thanks to Fotis. Also thanks to the Zend Lucene source // for the second bit. iconv doesn't mean that PCRE was compiled // with support for Unicode character classes, which the Lucene // cross-language tokenizer requires to work. Lovely if (function_exists('iconv') && @preg_match('/\\pL/u', 'a') == 1) { Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8()); } }
/** * DOCUMENT ME * @param sfWebRequest $request * @return mixed */ public function executeSearch(sfWebRequest $request) { $now = date('YmdHis'); // create the array of pages matching the query $q = $request->getParameter('q'); if ($request->hasParameter('x')) { // We sometimes like to use input type="image" for presentation reasons, but it generates // ugly x and y parameters with click coordinates. Get rid of those and come back. return $this->redirect(sfContext::getInstance()->getController()->genUrl('a/search', true) . '?' . http_build_query(array("q" => $q))); } $key = strtolower(trim($q)); $key = preg_replace('/\\s+/', ' ', $key); $replacements = sfConfig::get('app_a_search_refinements', array()); if (isset($replacements[$key])) { $q = $replacements[$key]; } try { $values = aZendSearch::searchLuceneWithValues(Doctrine::getTable('aPage'), $q, aTools::getUserCulture()); } catch (Exception $e) { // Lucene search error. TODO: display it nicely if they are always safe things to display. For now: just don't crash $values = array(); } // The truth is that Zend cannot do all of our filtering for us, especially // permissions-based. So we can do some other filtering as well, although it // would be bad not to have Zend take care of the really big cuts (if 99% are // not being prefiltered by Zend, and we have a Zend max results of 1000, then // we are reduced to working with a maximum of 10 real results). $nvalues = array(); $index = Doctrine::getTable('aPage')->getLuceneIndex(); foreach ($values as $value) { $document = $index->getDocument($value->id); // $published_at = $value->published_at; // New way: don't touch anything but $hit->id directly and you won't force a persistent // use of memory for the lazy loaded columns http://zendframework.com/issues/browse/ZF-8267 $published_at = $document->getFieldValue('published_at'); if ($published_at > $now) { continue; } // 1.5: the names under which we store columns in Zend Lucene have changed to // avoid conflict with also indexing them $info = unserialize($document->getFieldValue('info_stored')); if (!aPageTable::checkPrivilege('view', $info)) { continue; } $slug = $document->getFieldValue('slug_stored'); if (substr($slug, 0, 1) !== '@' && strpos($slug, '/') === false) { // A virtual page slug which is not a route is not interested in being part of search results continue; } $nvalues[] = $value; } $values = $nvalues; if ($this->searchAddResults($values, $q)) { foreach ($values as $value) { if (get_class($value) === 'stdClass') { // bc with existing implementations of searchAddResults if (!isset($value->slug_stored)) { if (isset($value->slug)) { $value->slug_stored = $value->slug; } else { $value->slug_stored = null; } } if (!isset($value->title_stored)) { $value->title_stored = $value->title; } if (!isset($value->summary_stored)) { $value->summary_stored = $value->summary; } if (!isset($value->engine_stored)) { if (isset($value->engine)) { $value->engine_stored = $value->engine; } else { $value->engine_stored = null; } } } } // $value = new stdClass(); // $value->url = $url; // $value->title = $title; // $value->score = $scores[$id]; // $value->summary = $summary; // $value->class = 'Article'; // $values[] = $value; usort($values, "aActions::compareScores"); } $this->pager = new aArrayPager(null, sfConfig::get('app_a_search_results_per_page', 10)); $this->pager->setResultArray($values); $this->pager->setPage($request->getParameter('page', 1)); $this->pager->init(); $this->pagerUrl = "a/search?" . http_build_query(array("q" => $q)); // setTitle takes care of escaping things $this->getResponse()->setTitle(aTools::getOptionI18n('title_prefix') . 'Search for ' . $q . aTools::getOptionI18n('title_suffix')); $results = $this->pager->getResults(); // Now that we have paginated and obtained the short list of results we really // care about it's OK to use the lazy load features of Lucene for the last mile $nresults = array(); foreach ($results as $value) { $nvalue = $value; $nvalue->slug = $nvalue->slug_stored; $nvalue->title = $nvalue->title_stored; $nvalue->summary = $nvalue->summary_stored; if (strlen($nvalue->engine_stored)) { $helperClass = $nvalue->engine_stored . 'SearchHelper'; if (class_exists($helperClass)) { $searchHelper = new $helperClass(); $nvalue->partial = $searchHelper->getPartial(); } } if (!isset($nvalue->url)) { if (substr($nvalue->slug, 0, 1) === '@') { // Virtual page slug is a named Symfony route, it wants search results to go there $nvalue->url = $this->getController()->genUrl($nvalue->slug, true); } else { $slash = strpos($nvalue->slug, '/'); if ($slash === false) { // A virtual page (such as global) that isn't the least bit interested in // being part of search results continue; } if ($slash > 0) { // A virtual page slug which is a valid Symfony route, such as foo/bar?id=55 $nvalue->url = $this->getController()->genUrl($nvalue->slug, true); } else { // A normal CMS page $nvalue->url = aTools::urlForPage($nvalue->slug); } } } $nvalue->class = 'aPage'; $nresults[] = $nvalue; } $this->results = $nresults; }
/** * DOCUMENT ME * @return mixed */ public function updateLuceneIndex() { if ($this->getAdmin()) { // Never index admin pages, that goes against the spirit of // keeping them completely out of navigation, they are not // a place for content in the normal sense, they are engines // for administrative purposes return; } $title = $this->getTitle(); $engine = $this->getEngine(); $summary = $this->getSearchSummary(); $text = $this->getSearchText(); $tags = implode(',', $this->getTags()); $categories = array(); $categoryObjects = $this->getCategories(); foreach ($categoryObjects as $category) { $categories[] = $category->getName(); } $categories = implode(',', $categories); $metaDescription = $this->getMetaDescription(); $slug = $this->getSlug(); $info = $this->getInfo(); // Already separate fields, so don't store them twice. unset($info['title'], $info['engine']); $tags = $this->getTags(); $args = array('object' => $this, 'indexed' => array('text' => $text, 'slug' => $slug, 'title' => $title, 'tags' => implode(', ', $tags), 'categories' => $categories, 'metadescription' => $metaDescription, 'engine' => $engine), 'culture' => $this->getCulture(), 'stored' => array('title_stored' => $title, 'summary_stored' => $summary, 'slug_stored' => $slug, 'engine_stored' => strlen($engine) ? $engine : '', 'info_stored' => serialize($info)), 'boosts' => array('tags' => 2.0, 'metadescription' => 1.2, 'title' => 3.0), 'keywords' => array('published_at' => preg_replace('/[^\\d]/', '', $this->published_at))); if (strlen($engine)) { $helperClass = $engine . 'SearchHelper'; if (class_exists($helperClass)) { $searchHelper = new $helperClass(); $args = $searchHelper->filterUpdateLuceneIndex($args); } } // If a filter returns false, it might be associated with a missing event. // don't index if (is_array($args)) { aZendSearch::updateLuceneIndex($args); } }