/** * Debug method to allow manual reindexing with output via the URL * /Lucene/reindex * * @access public * Note that this should NOT be used as a reindexing * process in production, as it doesn't allow for out of memory or script * execution time problems. */ public function reindex() { set_time_limit(600); $start = microtime(true); echo '<h1>Reindexing</h1>' . "\n"; flush(); echo 'Note that this process may die due to time limit or memory ' . 'exhaustion, and is purely for debugging purposes. Use the ' . 'Queued Jobs reindex process for production indexing.' . "<br />\n<br />\n"; flush(); ZendSearchLuceneWrapper::getIndex(true); $indexable = ZendSearchLuceneWrapper::getAllIndexableObjects(); foreach ($indexable as $item) { $obj = DataObject::get_by_id($item[0], $item[1]); if ($obj) { $obj_start = microtime(true); echo $item[0] . ' ' . $item[1] . ' (' . $obj->class . ')'; flush(); ZendSearchLuceneWrapper::index($obj); echo ' - ' . round(microtime(true) - $obj_start, 3) . ' seconds' . "<br />\n"; flush(); } else { echo 'Object ' . $item[0] . ' ' . $item[1] . ' was not found.' . "<br />\n"; flush(); } } echo "<br />\n" . 'Finished (' . round(microtime(true) - $start, 3) . ' seconds)' . "<br />\n"; flush(); }
function testRebuildZendSearchLuceneIndex() { // Setup Object::remove_extension('ContentController', 'ZendSearchLuceneContentController'); Object::remove_extension('SiteConfig', 'ZendSearchLuceneSiteConfig'); Object::remove_extension('LeftAndMain', 'ZendSearchLuceneCMSDecorator'); Object::remove_extension('SiteTree', 'ZendSearchLuceneSearchable'); Object::remove_extension('File', 'ZendSearchLuceneSearchable'); ZendSearchLuceneSearchable::$pageLength = 10; ZendSearchLuceneSearchable::$alwaysShowPages = 3; ZendSearchLuceneSearchable::$maxShowPages = 8; ZendSearchLuceneSearchable::$encoding = 'utf-8'; ZendSearchLuceneSearchable::$cacheDirectory = TEMP_FOLDER; ZendSearchLuceneWrapper::$indexName = 'Test'; ZendSearchLuceneSearchable::enable(); $index = ZendSearchLuceneWrapper::getIndex(true); // Blank database $this->assertEquals(0, $index->count()); // Count number of SiteTree and File objects $SiteTreeCount = DataObject::get('SiteTree')->count(); $FileCount = DataObject::get('File')->count(); $IndexableCount = $SiteTreeCount + $FileCount; // Re-index database $obj = new ZendSearchLuceneCMSDecorator(); $obj->rebuildZendSearchLuceneIndex(); // Has correct number of items? $this->assertEquals($IndexableCount, ZendSearchLuceneWrapper::getIndex()->count()); }
/** * Process and render search results. Uses the Lucene_results.ss template to * render the form. * * @access public * @param array $data The raw request data submitted by user * @param Form $form The form instance that was submitted * @param SS_HTTPRequest $request Request generated for this action * @return String The rendered form, for inclusion into the page template. */ public function ZendSearchLuceneResults($data, $form, $request) { $querystring = $form->Fields()->dataFieldByName('Search')->dataValue(); $query = Zend_Search_Lucene_Search_QueryParser::parse($querystring); $hits = ZendSearchLuceneWrapper::find($query); $data = $this->getDataArrayFromHits($hits, $request); return $this->owner->customise($data)->renderWith(array('Lucene_results', 'Page')); }
private static function hash($query) { if (!is_string($query[0])) { try { $query[0] = $query[0]->rewrite(ZendSearchLuceneWrapper::getIndex()); } catch (Exception $e) { $query[0] = serialize($query); } } $hash = 'search_' . md5(serialize($query)); return $hash; }
function testUpdateCMSActions() { // Setup Object::remove_extension('ContentController', 'ZendSearchLuceneContentController'); Object::remove_extension('SiteConfig', 'ZendSearchLuceneSiteConfig'); Object::remove_extension('LeftAndMain', 'ZendSearchLuceneCMSDecorator'); Object::remove_extension('SiteTree', 'ZendSearchLuceneSearchable'); Object::remove_extension('File', 'ZendSearchLuceneSearchable'); ZendSearchLuceneSearchable::$pageLength = 10; ZendSearchLuceneSearchable::$alwaysShowPages = 3; ZendSearchLuceneSearchable::$maxShowPages = 8; ZendSearchLuceneSearchable::$encoding = 'utf-8'; ZendSearchLuceneSearchable::$cacheDirectory = TEMP_FOLDER; ZendSearchLuceneWrapper::$indexName = 'Test'; ZendSearchLuceneSearchable::enable(array()); $config = SiteConfig::current_site_config(); $this->assertTrue(is_object($config->getCMSActions()->fieldByName('rebuildZendSearchLuceneIndex'))); }
public function process() { $remainingDocuments = $this->remainingDocuments; // if there's no more, we're done! if (!count($remainingDocuments)) { $this->isComplete = true; return; } $this->currentStep++; $item = array_shift($remainingDocuments); $obj = DataObject::get_by_id($item[0], $item[1]); ZendSearchLuceneWrapper::index($obj); // and now we store the new list of remaining children $this->remainingDocuments = $remainingDocuments; if (!count($remainingDocuments)) { $this->isComplete = true; return; } }
public function process() { // if there's no more, we're done! if (!count($this->jobData)) { $this->isComplete = true; $idx = ZendSearchLuceneWrapper::getIndex(); $idx->optimize(); return; } $this->currentStep++; $item = array_shift($this->jobData); $obj = DataObject::get_by_id($item[0], $item[1]); ZendSearchLuceneWrapper::index($obj); if (!count($this->jobData)) { $this->isComplete = true; $idx = ZendSearchLuceneWrapper::getIndex(); $idx->optimize(); return; } }
/** * Rebuilds the search index whenever a dev/build is run. * * This can be turned off by adding the following to your _config.php: * * <code> * ZendSearchLuceneSearchable::$reindexOnDevBuild = false; * </code> */ public function requireDefaultRecords() { if (!self::$reindexOnDevBuild) { return; } ZendSearchLuceneWrapper::rebuildIndex(); echo '<li><em>' . _t('ZendSearchLucene.RebuildSuccessMessage', 'A Lucene search index rebuild job has been added to the Jobs queue.') . '</em></li>'; // Only run once self::$reindexOnDevBuild = false; }
public function testOnAfterDelete() { // Setup Object::remove_extension('SiteTree', 'ZendSearchLuceneSearchable'); Object::remove_extension('File', 'ZendSearchLuceneSearchable'); ZendSearchLuceneSearchable::$pageLength = 10; ZendSearchLuceneSearchable::$alwaysShowPages = 3; ZendSearchLuceneSearchable::$maxShowPages = 8; ZendSearchLuceneSearchable::$encoding = 'utf-8'; ZendSearchLuceneSearchable::$cacheDirectory = TEMP_FOLDER; ZendSearchLuceneWrapper::$indexName = 'Test'; ZendSearchLuceneSearchable::enable(); // Blank the index ZendSearchLuceneWrapper::getIndex(true); // There shouldn't be anything with asdf in there $this->assertEquals(0, count(ZendSearchLuceneWrapper::find('asdf'))); $page = DataObject::get_one('Page'); $page->Content = 'asdf'; $page->write(); // There should now be a result $this->assertGreaterThan(0, count(ZendSearchLuceneWrapper::find('asdf'))); $page->delete(); // There should now be no result again $this->assertEquals(0, count(ZendSearchLuceneWrapper::find('asdf'))); }
/** * Returns the list of available subclasses of ZendSearchLuceneTextExtractor * in the order in which they should be processed. Order is determined by * the $priority static on each class. Default is 100 for all inbuilt * classes, lower numbers get run first. * * @access private * @static * @return Array An array of strings containing classnames. */ private static function getTextExtractorClasses() { if (!self::$extractorClasses) { $all_classes = ClassInfo::subclassesFor('ZendSearchLuceneTextExtractor'); usort($all_classes, create_function('$a, $b', ' $pa = new ReflectionClass($a); $pa = $pa->getStaticPropertyValue(\'priority\'); $pb = new ReflectionClass($b); $pb = $pb->getStaticPropertyValue(\'priority\'); if ( $pa == $pb ) return 0; return ($pa < $pb) ? -1 : 1;')); self::$extractorClasses = $all_classes; } return self::$extractorClasses; }
/** * Method for testing config */ public function diagnose() { echo '<h1>Lucene Diagnosis</h1>'; echo '<hr /><h2>Dependencies</h2>'; if (!file_exists(Director::baseFolder() . '/queuedjobs')) { echo '<p>The <strong>Queued Jobs</strong> module is not installed. Reindexing will not work.</p>'; echo '<p>Please install this module to enable Lucene.</p>'; echo '<p><a href="http://www.silverstripe.org/queued-jobs-module/">Queued Jobs</a></p>'; } else { echo '<p>The <strong>Queued Jobs</strong> module is installed.</p>'; } echo '<hr /><h2>Installed programs/extensions</h2>'; // catdoc - scan older MS documents $catdoc = false; if (defined('CATDOC_BINARY_LOCATION') && file_exists(CATDOC_BINARY_LOCATION)) { $catdoc = CATDOC_BINARY_LOCATION; } else { if (file_exists('/usr/bin/catdoc')) { $catdoc = '/usr/bin/catdoc'; } else { if (file_exists('/usr/local/bin/catdoc')) { $catdoc = '/usr/local/bin/catdoc'; } } } if ($catdoc) { echo '<p>Utility <strong>catdoc</strong> is installed at ' . $catdoc . ' - older MS Office documents (.doc, .xls, .ppt) will be scanned.</p>'; } else { echo '<p>Utility <strong>catdoc</strong> is not installed. Older MS Office documents (.doc, .xls, .ppt) will not be scanned.</p>'; } // zip - scan newer MS documents if (extension_loaded('zip')) { echo '<p>PHP extension <strong>zip</strong> is installed - newer MS Office documents (.docx, .xlsx, .pptx) will be scanned.</p>'; } else { echo '<p>PHP extension <strong>zip</strong> is not installed - newer MS Office documents (.docx, .xlsx, .pptx) will not be scanned.</p>'; } // pdftotext - scan PDF documents $pdftotext = false; if (defined('PDFTOTEXT_BINARY_LOCATION')) { $pdftotext = PDFTOTEXT_BINARY_LOCATION; } else { if (file_exists('/usr/bin/pdftotext')) { $pdftotext = '/usr/bin/pdftotext'; } else { if (file_exists('/usr/local/bin/pdftotext')) { $pdftotext = '/usr/local/bin/pdftotext'; } } } if ($pdftotext) { echo '<p>Utility <strong>pdftotext</strong> is installed at ' . $pdftotext . '. PDF documents will be scanned.</p>'; } else { if (extension_loaded('zlib')) { echo '<p>Utility <strong>pdftotext</strong> is not installed, but the PDF2Text class will be used to scan PDF documents.</p>'; } else { echo '<p>Utility <strong>pdftotext</strong> is not installed, and PHP extension <strong>zlib</strong> is not loaded. ' . 'PDF documents using gzip compression will not be scanned. Other PDF documents will be scanned using the PDF2Text class.</p>'; } } echo '<hr /><h2>Index</h2>'; $idx = ZendSearchLuceneWrapper::getIndex(); echo '<p>Number of records in the index: ' . $idx->count() . '</p>'; echo '<p>Number of records in the index (excluding deleted records): ' . $idx->numDocs() . '</p>'; echo '<hr /><h2>Database setup</h2>'; $max_packet = mysql_fetch_object(mysql_query('SELECT @@max_allowed_packet AS size')); echo '<p>Your MySQL max_allowed_packet value is ' . $max_packet->size . '.<br/>'; if ($max_packet->size >= 128 * 1024 * 1024) { echo 'This should be high enough to cope with large datasets.'; } else { echo 'This may cause issues with large datasets.</p>'; echo '<p>To rectify this, you can add the following lines to functions that may create large datasets, eg. search actions:</p>'; echo '<pre>' . 'mysql_query(\'SET GLOBAL net_buffer_length=1000000\');' . "\n" . 'mysql_query(\'SET GLOBAL max_allowed_packet=1000000000\');</pre>'; echo '<p>Alternatively, you can set these config values in your MySQL server config file.'; } echo '</p>'; $log_bin = mysql_fetch_object(mysql_query('SELECT @@log_bin AS log_bin')); if ($log_bin->log_bin == 0) { echo '<p>Your MySQL server is set to not use the binary log.<br/>' . 'This is the correct setting.</p>'; } else { echo '<p>Your MySQL server is set to use the binary log.<br/>' . 'This will result in a large amount of disk space being used for ' . 'logging Lucene operations, which can use many GB of space with ' . 'large datasets.</p>'; echo '<p>To rectify this, you can add the following lines to your _config.php:</p>'; echo '<pre>' . 'mysql_query(\'SET GLOBAL log_bin=0\');' . "\n" . '</pre>'; echo '<p>Alternatively, you can set this config value in your MySQL server config file.'; } $classes = ClassInfo::subclassesFor('DataObject'); foreach ($classes as $class) { if (!Object::has_extension($class, 'ZendSearchLuceneSearchable')) { continue; } $class_config = singleton($class)->getLuceneClassConfig(); echo '<hr/><h2>' . $class . '</h2>'; echo '<h3>Class config</h3>'; Debug::dump($class_config); echo '<h3>Field config</h3>'; foreach (singleton($class)->getSearchedVars() as $fieldname) { echo '<h4>' . $fieldname . '</h4>'; if ($fieldname == 'Link') { echo '<p>No output means that Link is not indexed for this class.</p>'; } @Debug::dump(singleton($class)->getLuceneFieldConfig($fieldname)); } } }