/**
  * Debug method to allow manual reindexing with output via the URL 
  * /Lucene/reindex
  *
  * @access public
  * Note that this should NOT be used as a reindexing
  * process in production, as it doesn't allow for out of memory or script 
  * execution time problems.
  */
 public function reindex()
 {
     set_time_limit(600);
     $start = microtime(true);
     echo '<h1>Reindexing</h1>' . "\n";
     flush();
     echo 'Note that this process may die due to time limit or memory ' . 'exhaustion, and is purely for debugging purposes.  Use the ' . 'Queued Jobs reindex process for production indexing.' . "<br />\n<br />\n";
     flush();
     ZendSearchLuceneWrapper::getIndex(true);
     $indexable = ZendSearchLuceneWrapper::getAllIndexableObjects();
     foreach ($indexable as $item) {
         $obj = DataObject::get_by_id($item[0], $item[1]);
         if ($obj) {
             $obj_start = microtime(true);
             echo $item[0] . ' ' . $item[1] . ' (' . $obj->class . ')';
             flush();
             ZendSearchLuceneWrapper::index($obj);
             echo ' - ' . round(microtime(true) - $obj_start, 3) . ' seconds' . "<br />\n";
             flush();
         } else {
             echo 'Object ' . $item[0] . ' ' . $item[1] . ' was not found.' . "<br />\n";
             flush();
         }
     }
     echo "<br />\n" . 'Finished (' . round(microtime(true) - $start, 3) . ' seconds)' . "<br />\n";
     flush();
 }
 function testRebuildZendSearchLuceneIndex()
 {
     // Setup
     Object::remove_extension('ContentController', 'ZendSearchLuceneContentController');
     Object::remove_extension('SiteConfig', 'ZendSearchLuceneSiteConfig');
     Object::remove_extension('LeftAndMain', 'ZendSearchLuceneCMSDecorator');
     Object::remove_extension('SiteTree', 'ZendSearchLuceneSearchable');
     Object::remove_extension('File', 'ZendSearchLuceneSearchable');
     ZendSearchLuceneSearchable::$pageLength = 10;
     ZendSearchLuceneSearchable::$alwaysShowPages = 3;
     ZendSearchLuceneSearchable::$maxShowPages = 8;
     ZendSearchLuceneSearchable::$encoding = 'utf-8';
     ZendSearchLuceneSearchable::$cacheDirectory = TEMP_FOLDER;
     ZendSearchLuceneWrapper::$indexName = 'Test';
     ZendSearchLuceneSearchable::enable();
     $index = ZendSearchLuceneWrapper::getIndex(true);
     // Blank database
     $this->assertEquals(0, $index->count());
     // Count number of SiteTree and File objects
     $SiteTreeCount = DataObject::get('SiteTree')->count();
     $FileCount = DataObject::get('File')->count();
     $IndexableCount = $SiteTreeCount + $FileCount;
     // Re-index database
     $obj = new ZendSearchLuceneCMSDecorator();
     $obj->rebuildZendSearchLuceneIndex();
     // Has correct number of items?
     $this->assertEquals($IndexableCount, ZendSearchLuceneWrapper::getIndex()->count());
 }
 /**
  * Process and render search results. Uses the Lucene_results.ss template to
  * render the form.
  * 
  * @access public
  * @param   array           $data       The raw request data submitted by user
  * @param   Form            $form       The form instance that was submitted
  * @param   SS_HTTPRequest  $request    Request generated for this action
  * @return  String                      The rendered form, for inclusion into the page template.
  */
 public function ZendSearchLuceneResults($data, $form, $request)
 {
     $querystring = $form->Fields()->dataFieldByName('Search')->dataValue();
     $query = Zend_Search_Lucene_Search_QueryParser::parse($querystring);
     $hits = ZendSearchLuceneWrapper::find($query);
     $data = $this->getDataArrayFromHits($hits, $request);
     return $this->owner->customise($data)->renderWith(array('Lucene_results', 'Page'));
 }
 private static function hash($query)
 {
     if (!is_string($query[0])) {
         try {
             $query[0] = $query[0]->rewrite(ZendSearchLuceneWrapper::getIndex());
         } catch (Exception $e) {
             $query[0] = serialize($query);
         }
     }
     $hash = 'search_' . md5(serialize($query));
     return $hash;
 }
 function testUpdateCMSActions()
 {
     // Setup
     Object::remove_extension('ContentController', 'ZendSearchLuceneContentController');
     Object::remove_extension('SiteConfig', 'ZendSearchLuceneSiteConfig');
     Object::remove_extension('LeftAndMain', 'ZendSearchLuceneCMSDecorator');
     Object::remove_extension('SiteTree', 'ZendSearchLuceneSearchable');
     Object::remove_extension('File', 'ZendSearchLuceneSearchable');
     ZendSearchLuceneSearchable::$pageLength = 10;
     ZendSearchLuceneSearchable::$alwaysShowPages = 3;
     ZendSearchLuceneSearchable::$maxShowPages = 8;
     ZendSearchLuceneSearchable::$encoding = 'utf-8';
     ZendSearchLuceneSearchable::$cacheDirectory = TEMP_FOLDER;
     ZendSearchLuceneWrapper::$indexName = 'Test';
     ZendSearchLuceneSearchable::enable(array());
     $config = SiteConfig::current_site_config();
     $this->assertTrue(is_object($config->getCMSActions()->fieldByName('rebuildZendSearchLuceneIndex')));
 }
 public function process()
 {
     $remainingDocuments = $this->remainingDocuments;
     // if there's no more, we're done!
     if (!count($remainingDocuments)) {
         $this->isComplete = true;
         return;
     }
     $this->currentStep++;
     $item = array_shift($remainingDocuments);
     $obj = DataObject::get_by_id($item[0], $item[1]);
     ZendSearchLuceneWrapper::index($obj);
     // and now we store the new list of remaining children
     $this->remainingDocuments = $remainingDocuments;
     if (!count($remainingDocuments)) {
         $this->isComplete = true;
         return;
     }
 }
 public function process()
 {
     // if there's no more, we're done!
     if (!count($this->jobData)) {
         $this->isComplete = true;
         $idx = ZendSearchLuceneWrapper::getIndex();
         $idx->optimize();
         return;
     }
     $this->currentStep++;
     $item = array_shift($this->jobData);
     $obj = DataObject::get_by_id($item[0], $item[1]);
     ZendSearchLuceneWrapper::index($obj);
     if (!count($this->jobData)) {
         $this->isComplete = true;
         $idx = ZendSearchLuceneWrapper::getIndex();
         $idx->optimize();
         return;
     }
 }
 /**
  * Rebuilds the search index whenever a dev/build is run.
  *
  * This can be turned off by adding the following to your _config.php:
  *
  * <code>
  * ZendSearchLuceneSearchable::$reindexOnDevBuild = false;
  * </code>
  */
 public function requireDefaultRecords()
 {
     if (!self::$reindexOnDevBuild) {
         return;
     }
     ZendSearchLuceneWrapper::rebuildIndex();
     echo '<li><em>' . _t('ZendSearchLucene.RebuildSuccessMessage', 'A Lucene search index rebuild job has been added to the Jobs queue.') . '</em></li>';
     // Only run once
     self::$reindexOnDevBuild = false;
 }
 public function testOnAfterDelete()
 {
     // Setup
     Object::remove_extension('SiteTree', 'ZendSearchLuceneSearchable');
     Object::remove_extension('File', 'ZendSearchLuceneSearchable');
     ZendSearchLuceneSearchable::$pageLength = 10;
     ZendSearchLuceneSearchable::$alwaysShowPages = 3;
     ZendSearchLuceneSearchable::$maxShowPages = 8;
     ZendSearchLuceneSearchable::$encoding = 'utf-8';
     ZendSearchLuceneSearchable::$cacheDirectory = TEMP_FOLDER;
     ZendSearchLuceneWrapper::$indexName = 'Test';
     ZendSearchLuceneSearchable::enable();
     // Blank the index
     ZendSearchLuceneWrapper::getIndex(true);
     // There shouldn't be anything with asdf in there
     $this->assertEquals(0, count(ZendSearchLuceneWrapper::find('asdf')));
     $page = DataObject::get_one('Page');
     $page->Content = 'asdf';
     $page->write();
     // There should now be a result
     $this->assertGreaterThan(0, count(ZendSearchLuceneWrapper::find('asdf')));
     $page->delete();
     // There should now be no result again
     $this->assertEquals(0, count(ZendSearchLuceneWrapper::find('asdf')));
 }
 /**
  * Returns the list of available subclasses of ZendSearchLuceneTextExtractor
  * in the order in which they should be processed.  Order is determined by
  * the $priority static on each class.  Default is 100 for all inbuilt 
  * classes, lower numbers get run first.
  *
  * @access private
  * @static
  * @return  Array   An array of strings containing classnames.
  */
 private static function getTextExtractorClasses()
 {
     if (!self::$extractorClasses) {
         $all_classes = ClassInfo::subclassesFor('ZendSearchLuceneTextExtractor');
         usort($all_classes, create_function('$a, $b', '
                 $pa = new ReflectionClass($a);
                 $pa = $pa->getStaticPropertyValue(\'priority\');
                 $pb = new ReflectionClass($b);
                 $pb = $pb->getStaticPropertyValue(\'priority\');
                 if ( $pa == $pb ) return 0;
                 return ($pa < $pb) ? -1 : 1;'));
         self::$extractorClasses = $all_classes;
     }
     return self::$extractorClasses;
 }
 /**
  * Method for testing config
  */
 public function diagnose()
 {
     echo '<h1>Lucene Diagnosis</h1>';
     echo '<hr /><h2>Dependencies</h2>';
     if (!file_exists(Director::baseFolder() . '/queuedjobs')) {
         echo '<p>The <strong>Queued Jobs</strong> module is not installed.  Reindexing will not work.</p>';
         echo '<p>Please install this module to enable Lucene.</p>';
         echo '<p><a href="http://www.silverstripe.org/queued-jobs-module/">Queued Jobs</a></p>';
     } else {
         echo '<p>The <strong>Queued Jobs</strong> module is installed.</p>';
     }
     echo '<hr /><h2>Installed programs/extensions</h2>';
     // catdoc - scan older MS documents
     $catdoc = false;
     if (defined('CATDOC_BINARY_LOCATION') && file_exists(CATDOC_BINARY_LOCATION)) {
         $catdoc = CATDOC_BINARY_LOCATION;
     } else {
         if (file_exists('/usr/bin/catdoc')) {
             $catdoc = '/usr/bin/catdoc';
         } else {
             if (file_exists('/usr/local/bin/catdoc')) {
                 $catdoc = '/usr/local/bin/catdoc';
             }
         }
     }
     if ($catdoc) {
         echo '<p>Utility <strong>catdoc</strong> is installed at ' . $catdoc . ' - older MS Office documents (.doc, .xls, .ppt) will be scanned.</p>';
     } else {
         echo '<p>Utility <strong>catdoc</strong> is not installed.  Older MS Office documents (.doc, .xls, .ppt) will not be scanned.</p>';
     }
     // zip - scan newer MS documents
     if (extension_loaded('zip')) {
         echo '<p>PHP extension <strong>zip</strong> is installed - newer MS Office documents (.docx, .xlsx, .pptx) will be scanned.</p>';
     } else {
         echo '<p>PHP extension <strong>zip</strong> is not installed - newer MS Office documents (.docx, .xlsx, .pptx) will not be scanned.</p>';
     }
     // pdftotext - scan PDF documents
     $pdftotext = false;
     if (defined('PDFTOTEXT_BINARY_LOCATION')) {
         $pdftotext = PDFTOTEXT_BINARY_LOCATION;
     } else {
         if (file_exists('/usr/bin/pdftotext')) {
             $pdftotext = '/usr/bin/pdftotext';
         } else {
             if (file_exists('/usr/local/bin/pdftotext')) {
                 $pdftotext = '/usr/local/bin/pdftotext';
             }
         }
     }
     if ($pdftotext) {
         echo '<p>Utility <strong>pdftotext</strong> is installed at ' . $pdftotext . '.  PDF documents will be scanned.</p>';
     } else {
         if (extension_loaded('zlib')) {
             echo '<p>Utility <strong>pdftotext</strong> is not installed, but the PDF2Text class will be used to scan PDF documents.</p>';
         } else {
             echo '<p>Utility <strong>pdftotext</strong> is not installed, and PHP extension <strong>zlib</strong> is not loaded.  ' . 'PDF documents using gzip compression will not be scanned.  Other PDF documents will be scanned using the PDF2Text class.</p>';
         }
     }
     echo '<hr /><h2>Index</h2>';
     $idx = ZendSearchLuceneWrapper::getIndex();
     echo '<p>Number of records in the index: ' . $idx->count() . '</p>';
     echo '<p>Number of records in the index (excluding deleted records): ' . $idx->numDocs() . '</p>';
     echo '<hr /><h2>Database setup</h2>';
     $max_packet = mysql_fetch_object(mysql_query('SELECT @@max_allowed_packet AS size'));
     echo '<p>Your MySQL max_allowed_packet value is ' . $max_packet->size . '.<br/>';
     if ($max_packet->size >= 128 * 1024 * 1024) {
         echo 'This should be high enough to cope with large datasets.';
     } else {
         echo 'This may cause issues with large datasets.</p>';
         echo '<p>To rectify this, you can add the following lines to functions that may create large datasets, eg. search actions:</p>';
         echo '<pre>' . 'mysql_query(\'SET GLOBAL net_buffer_length=1000000\');' . "\n" . 'mysql_query(\'SET GLOBAL max_allowed_packet=1000000000\');</pre>';
         echo '<p>Alternatively, you can set these config values in your MySQL server config file.';
     }
     echo '</p>';
     $log_bin = mysql_fetch_object(mysql_query('SELECT @@log_bin AS log_bin'));
     if ($log_bin->log_bin == 0) {
         echo '<p>Your MySQL server is set to not use the binary log.<br/>' . 'This is the correct setting.</p>';
     } else {
         echo '<p>Your MySQL server is set to use the binary log.<br/>' . 'This will result in a large amount of disk space being used for ' . 'logging Lucene operations, which can use many GB of space with ' . 'large datasets.</p>';
         echo '<p>To rectify this, you can add the following lines to your _config.php:</p>';
         echo '<pre>' . 'mysql_query(\'SET GLOBAL log_bin=0\');' . "\n" . '</pre>';
         echo '<p>Alternatively, you can set this config value in your MySQL server config file.';
     }
     $classes = ClassInfo::subclassesFor('DataObject');
     foreach ($classes as $class) {
         if (!Object::has_extension($class, 'ZendSearchLuceneSearchable')) {
             continue;
         }
         $class_config = singleton($class)->getLuceneClassConfig();
         echo '<hr/><h2>' . $class . '</h2>';
         echo '<h3>Class config</h3>';
         Debug::dump($class_config);
         echo '<h3>Field config</h3>';
         foreach (singleton($class)->getSearchedVars() as $fieldname) {
             echo '<h4>' . $fieldname . '</h4>';
             if ($fieldname == 'Link') {
                 echo '<p>No output means that Link is not indexed for this class.</p>';
             }
             @Debug::dump(singleton($class)->getLuceneFieldConfig($fieldname));
         }
     }
 }