protected function execute($arguments = array(), $options = array()) { // Memory usage is a bit high here because we look at every page, and the Rackspace Cloud // environment has a very low default memory limit for their ersatz "cron jobs." // TODO: prioritize a low-memory solution for rebuild-search-index, which will be // necessary for large sites anyway ini_set('memory_limit', '256M'); // initialize the database connection $databaseManager = new sfDatabaseManager($this->configuration); $connection = $databaseManager->getDatabase($options['connection'] ? $options['connection'] : null)->getConnection(); // Initialize the context, which loading use of helpers, notably url_for // First set config vars so that reasonable siteless-but-rooted URLs can be generated // TODO: think about ways to make this work for people who like frontend_dev.php etc., although // we're doing rather well with an index.php that suits each environment sfConfig::set('sf_no_script_name', true); $_SERVER['PHP_SELF'] = ''; $_SERVER['SCRIPT_NAME'] = ''; $context = sfContext::createInstance($this->configuration); if (isset($options['table'])) { $indexes = array($options['table']); } else { $indexes = sfConfig::get('app_aToolkit_indexes', array()); } $count = 0; foreach ($indexes as $index) { $table = Doctrine::getTable($index); if ($index === 'aPage') { aZendSearch::purgeLuceneIndex($table); // We're about to request updates of all page/culture combinations. Don't // add that to an existing workload which could result in a huge pileup of // repeat requests if someone starts interrupting this task and trying again, etc. $this->query('DELETE FROM a_lucene_update'); $pages = Doctrine::getTable('aPage')->createQuery('p')->innerJoin('p.Areas a')->execute(array(), Doctrine::HYDRATE_ARRAY); foreach ($pages as $page) { $cultures = array(); foreach ($page['Areas'] as $area) { $cultures[$area['culture']] = true; } $cultures = array_keys($cultures); foreach ($cultures as $culture) { $this->query('INSERT INTO a_lucene_update (page_id, culture) VALUES (:page_id, :culture)', array('page_id' => $page['id'], 'culture' => $culture)); } } while (true) { $result = $this->query('SELECT COUNT(id) AS total FROM a_lucene_update'); $count = $result[0]['total']; if ($count == 0) { break; } $this->logSection('toolkit', "{$count} pages remain to be indexed, starting another update pass..."); $this->update(); } } else { // We don't have a deferred update feature for other tables, // so we'll have to get them done in the memory available $table->rebuildLuceneIndex(); } $this->logSection('toolkit', sprintf('Index for "%s" rebuilt', $index)); } }
/** * DOCUMENT ME * @param mixed $arguments * @param mixed $options */ protected function execute($arguments = array(), $options = array()) { // We've come a long way in reducing memory usage here, but it's still an expensive job ini_set('memory_limit', '256M'); // initialize the database connection $databaseManager = new sfDatabaseManager($this->configuration); $connection = $databaseManager->getDatabase($options['connection'] ? $options['connection'] : null)->getConnection(); // Initialize the context, which loading use of helpers, notably url_for // First set config vars so that reasonable siteless-but-rooted URLs can be generated // TODO: think about ways to make this work for people who like frontend_dev.php etc., although // we're doing rather well with an index.php that suits each environment sfConfig::set('sf_no_script_name', true); $_SERVER['PHP_SELF'] = ''; $_SERVER['SCRIPT_NAME'] = ''; $context = sfContext::createInstance($this->configuration); if (isset($options['table'])) { $indexes = array($options['table']); } else { $indexes = sfConfig::get('app_aToolkit_indexes', array()); } $count = 0; foreach ($indexes as $index) { $table = Doctrine::getTable($index); if ($index === 'aPage') { aZendSearch::purgeLuceneIndex($table); // We're about to request updates of all page/culture combinations. Don't // add that to an existing workload which could result in a huge pileup of // repeat requests if someone starts interrupting this task and trying again, etc. $this->query('DELETE FROM a_lucene_update'); $pages = Doctrine::getTable('aPage')->createQuery('p')->innerJoin('p.Areas a')->execute(array(), Doctrine::HYDRATE_ARRAY); foreach ($pages as $page) { $cultures = array(); foreach ($page['Areas'] as $area) { $cultures[$area['culture']] = true; } $cultures = array_keys($cultures); foreach ($cultures as $culture) { $this->query('INSERT INTO a_lucene_update (page_id, culture) VALUES (:page_id, :culture)', array('page_id' => $page['id'], 'culture' => $culture)); } } while (true) { $result = $this->query('SELECT COUNT(id) AS total FROM a_lucene_update'); $count = $result[0]['total']; if ($count == 0) { break; } if ($options['verbose']) { $this->logSection('toolkit', "{$count} pages remain to be indexed, starting another update pass..."); } $this->update('aPage', $options); } } else { if ($table->hasField('lucene_dirty')) { aZendSearch::purgeLuceneIndex($table); $tableSqlName = $table->getTableName(); // Use Doctrine update and count queries to get the performance while // retaining compatibility with aggregate inheritance "tables" like // dukeTubesArticle and dukeTubesEvent. With raw SQL we get confused // because we run out of objects that Doctrine recognizes as being of the // relevant type but we marked everything in the table as "dirty" Doctrine_Query::create()->update($index)->set('lucene_dirty', true)->execute(); while (true) { $count = $table->createQuery('q')->where('q.lucene_dirty IS TRUE')->count(); if ($count == 0) { break; } if ($options['verbose']) { $this->logSection('toolkit', "{$count} {$index} objects remain to be indexed, starting another update pass..."); } $this->update($index, $options); } } else { // We don't have a deferred update feature for other tables, // so we'll have to get them done in the memory available $table->rebuildLuceneIndex(); } } if ($options['verbose']) { $this->logSection('toolkit', sprintf('Index for "%s" rebuilt', $index)); } } }
/** * This does the entire thing at one go, which may be too memory intensive. * The apostrophe:rebuild-search-index task instead invokes apostrophe:update-search-index * for batches of 100 pages */ public function rebuildLuceneIndex() { aZendSearch::purgeLuceneIndex($this); $pages = $this->createQuery('p')->innerJoin('p.Areas a')->execute(array(), Doctrine::HYDRATE_ARRAY); foreach ($pages as $page) { $cultures = array(); foreach ($page['Areas'] as $area) { $cultures[$area['culture']] = true; } $cultures = array_keys($cultures); foreach ($cultures as $culture) { $cpage = aPageTable::retrieveBySlugWithSlots($page['slug'], $culture); $cpage->updateLuceneIndex(); } } }
public function rebuildLuceneIndex() { aZendSearch::purgeLuceneIndex($this); $pages = $this->findAll(); foreach ($pages as $page) { $cultures = array(); foreach ($page->Areas as $area) { $cultures[$area->culture] = true; } $cultures = array_keys($cultures); foreach ($cultures as $culture) { $cpage = self::retrieveByIdWithSlots($page->id, $culture); $cpage->updateLuceneIndex(); } } }