Beispiel #1
0
 public function testFilesystemSubfoldersAutoCreation()
 {
     $directory = new Zend_Search_Lucene_Storage_Directory_Filesystem(dirname(__FILE__) . '/_tempFiles/_files/dir1/dir2/dir3');
     $this->assertTrue($directory instanceof Zend_Search_Lucene_Storage_Directory);
     $directory->close();
     rmdir(dirname(__FILE__) . '/_tempFiles/_files/dir1/dir2/dir3');
     rmdir(dirname(__FILE__) . '/_tempFiles/_files/dir1/dir2');
     rmdir(dirname(__FILE__) . '/_tempFiles/_files/dir1');
 }
 public function testCreate()
 {
     $directory = new Zend_Search_Lucene_Storage_Directory_Filesystem(dirname(__FILE__) . '/_files/_source');
     $stiFile = $directory->getFileObject('_1.sti');
     $stiFileData = $stiFile->readBytes($directory->fileLength('_1.sti'));
     // Load dictionary index data
     list($termDictionary, $termDictionaryInfos) = unserialize($stiFileData);
     $segmentInfo = new Zend_Search_Lucene_Index_SegmentInfo('_1', 2, $directory);
     $tiiFile = $segmentInfo->openCompoundFile('.tii');
     $tiiFileData = $tiiFile->readBytes($segmentInfo->compoundFileLength('.tii'));
     // Load dictionary index data
     list($loadedTermDictionary, $loadedTermDictionaryInfos) = Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData);
     $this->assertTrue($termDictionary == $loadedTermDictionary);
     $this->assertTrue($termDictionaryInfos == $loadedTermDictionaryInfos);
 }
 /**
  * Returns Zend_Search_Lucene instance for given subroot
  *
  * every subroot has it's own instance
  *
  * @param Kwf_Component_Data for this index
  * @return Zend_Search_Lucene_Interface
  */
 public static function getInstance(Kwf_Component_Data $subroot)
 {
     while ($subroot) {
         if (Kwc_Abstract::getFlag($subroot->componentClass, 'subroot')) {
             break;
         }
         $subroot = $subroot->parent;
     }
     if (!$subroot) {
         $subroot = Kwf_Component_Data_Root::getInstance();
     }
     static $instance = array();
     if (!isset($instance[$subroot->componentId])) {
         $analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive();
         $analyzer->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_ShortWords(2));
         //$stopWords = explode(' ', 'der dir das einer eine ein und oder doch ist sind an in vor nicht wir ihr sie es ich');
         //$analyzer->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_StopWords($stopWords));
         Zend_Search_Lucene_Analysis_Analyzer::setDefault($analyzer);
         Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding('utf-8');
         Zend_Search_Lucene_Storage_Directory_Filesystem::setDefaultFilePermissions(0666);
         $path = 'cache/fulltext';
         $path .= '/' . $subroot->componentId;
         try {
             $instance[$subroot->componentId] = Zend_Search_Lucene::open($path);
         } catch (Zend_Search_Lucene_Exception $e) {
             $instance[$subroot->componentId] = Zend_Search_Lucene::create($path);
         }
     }
     return $instance[$subroot->componentId];
 }
 /**
  * Write changes if it's necessary.
  */
 public function writeChanges()
 {
     if (!$this->_deletedDirty) {
         return;
     }
     if (extension_loaded('bitset')) {
         $delBytes = $this->_deleted;
         $bitCount = count(bitset_to_array($delBytes));
     } else {
         $byteCount = floor($this->_docCount / 8) + 1;
         $delBytes = str_repeat(chr(0), $byteCount);
         for ($count = 0; $count < $byteCount; $count++) {
             $byte = 0;
             for ($bit = 0; $bit < 8; $bit++) {
                 if (isset($this->_deleted[$count * 8 + $bit])) {
                     $byte |= 1 << $bit;
                 }
             }
             $delBytes[$count] = chr($byte);
         }
         $bitCount = count($this->_deleted);
     }
     $delFile = $this->_directory->createFile($this->_name . '.del');
     $delFile->writeInt($this->_docCount);
     $delFile->writeInt($bitCount);
     $delFile->writeBytes($delBytes);
     $this->_deletedDirty = false;
 }
 /**
  * Constructor to set initial values.
  *
  * @param string $location The index location
  */
 public function __construct($location)
 {
     $this->location = $location;
     $this->analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive();
     require_once 'Zend/Search/Lucene/Storage/Directory/Filesystem.php';
     Zend_Search_Lucene_Storage_Directory_Filesystem::setDefaultFilePermissions(0666);
 }
Beispiel #6
0
 public function __construct()
 {
     $index_path = BUGDAR_ROOT . '/cache/lucene_index';
     Zend_Search_Lucene_Storage_Directory_Filesystem::SetDefaultFilePermissions(0777);
     if (file_exists($index_path)) {
         $this->lucene = Zend_Search_Lucene::Open($index_path);
     } else {
         $this->lucene = Zend_Search_Lucene::Create($index_path);
     }
 }
Beispiel #7
0
 /**
  * Opens index file stoted within compound index file
  *
  * @param string $extension
  * @throws Zend_Search_Lucene_Exception
  * @return Zend_Search_Lucene_Storage_File
  */
 public function openCompoundFile($extension)
 {
     $filename = $this->_name . $extension;
     if (!isset($this->_segFiles[$filename])) {
         throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain ' . $filename . ' file.');
     }
     $file = $this->_directory->getFileObject($this->_name . ".cfs");
     $file->seek($this->_segFiles[$filename]);
     return $file;
 }
 public function createFile($filename)
 {
     try {
         parent::createFile($filename);
     } catch (Zend_Search_Lucene_Exception $e) {
         if (false === strpos($e->getMessage(), 'chmod')) {
             throw $e;
         }
     }
     return $this->_fileHandlers[$filename];
 }
    public function testMerge()
    {
        $segmentsDirectory = new Zend_Search_Lucene_Storage_Directory_Filesystem(dirname(__FILE__) . '/_files/_source');
        $outputDirectory   = new Zend_Search_Lucene_Storage_Directory_Filesystem(dirname(__FILE__) . '/_files');
        $segmentsList = array('_0', '_1', '_2', '_3', '_4');

        $segmentMerger = new Zend_Search_Lucene_Index_SegmentMerger($outputDirectory, 'mergedSegment');

        foreach ($segmentsList as $segmentName) {
            $segmentMerger->addSource(new Zend_Search_Lucene_Index_SegmentInfo($segmentName, 2, $segmentsDirectory));
        }

        $mergedSegment = $segmentMerger->merge();
        $this->assertTrue($mergedSegment instanceof Zend_Search_Lucene_Index_SegmentInfo);
        unset($mergedSegment);

        $mergedFile = $outputDirectory->getFileObject('mergedSegment.cfs');
        $mergedFileData = $mergedFile->readBytes($outputDirectory->fileLength('mergedSegment.cfs'));

        $sampleFile = $outputDirectory->getFileObject('mergedSegment.cfs.sample');
        $sampleFileData = $sampleFile->readBytes($outputDirectory->fileLength('mergedSegment.cfs.sample'));

        $this->assertEquals($mergedFileData, $sampleFileData);

        $outputDirectory->deleteFile('mergedSegment.cfs');
    }
Beispiel #10
0
 function __construct($directory, $lang = 'en', $highlight = true)
 {
     switch ($lang) {
         case 'en':
         default:
             Zend_Search_Lucene_Analysis_Analyzer::setDefault(new StandardAnalyzer_Analyzer_Standard_English());
             Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding('UTF-8');
     }
     Zend_Search_Lucene_Storage_Directory_Filesystem::setDefaultFilePermissions(0660);
     $this->directory = $directory;
     $this->lastModif = file_exists($directory) ? filemtime($directory) : 0;
     $this->highlight = (bool) $highlight;
 }
  private static function prepareZendSearchLucene()
  {
    Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive());

    $stopWords = sfConfig::get('app_sf_propel_luceneable_behavior_stopWords', false);
    $stopWordsFilter = new Zend_Search_Lucene_Analysis_TokenFilter_StopWords(false === $stopWords ? array() : explode(',', $stopWords));
    Zend_Search_Lucene_Analysis_Analyzer::getDefault()->addFilter($stopWordsFilter);

    $shortWords = sfConfig::get('app_sf_propel_luceneable_behavior_shortWords', 3);
    $shortWordsFilter = new Zend_Search_Lucene_Analysis_TokenFilter_ShortWords($shortWords);
    Zend_Search_Lucene_Analysis_Analyzer::getDefault()->addFilter($shortWordsFilter);

    Zend_Search_Lucene_Storage_Directory_Filesystem::setDefaultFilePermissions(0777);
  }
Beispiel #12
0
 public function searchAction()
 {
     $form = new Application_Form_Search();
     $this->view->form = $form;
     $form->submit->setLabel('search jobs');
     $currentPermissions = Zend_Search_Lucene_Storage_Directory_Filesystem::getDefaultFilePermissions();
     echo $currentPermissions;
     if ($this->getRequest()->isPost()) {
         $formData = $this->getRequest()->getPost();
         if ($form->isValid($_POST)) {
             $query = $form->getValue('query');
             $index = Zend_Search_Lucene::open('C:\\indexed');
             $hits = $index->find($query);
             $this->view->jobs = $hits;
             // foreach ($hits as $hit) {
             //echo $hit->pri;
             //echo $hit->shortd;
             //}
         } else {
             $form->populate($formData);
         }
     }
 }
Beispiel #13
0
 /**
  * Write changes if it's necessary.
  */
 public function writeChanges()
 {
     if (!$this->_deletedDirty) {
         return;
     }
     if (extension_loaded('bitset')) {
         $delBytes = $this->_deleted;
         $bitCount = count(bitset_to_array($delBytes));
     } else {
         $byteCount = floor($this->_docCount / 8) + 1;
         $delBytes = str_repeat(chr(0), $byteCount);
         for ($count = 0; $count < $byteCount; $count++) {
             $byte = 0;
             for ($bit = 0; $bit < 8; $bit++) {
                 if (isset($this->_deleted[$count * 8 + $bit])) {
                     $byte |= 1 << $bit;
                 }
             }
             $delBytes[$count] = chr($byte);
         }
         $bitCount = count($this->_deleted);
     }
     // Get new generation number
     $lock = Zend_Search_Lucene::obtainWriteLock($this->_directory);
     $delFileList = array();
     foreach ($this->_directory->fileList() as $file) {
         if ($file == $this->_name . '.del') {
             // Matches <segment_name>.del file name
             $delFileList[] = 0;
         } else {
             if (preg_match('/^' . $this->_name . '_([a-zA-Z0-9]+)\\.del$/i', $file, $matches)) {
                 // Matches <segment_name>_NNN.del file names
                 $delFileList[] = (int) $matches[1];
             }
         }
     }
     if (count($delFileList) == 0) {
         // There is no deletions file for current segment in the directory
         // Set detetions file generation number to 1
         $this->_delGen = 1;
     } else {
         // There are some deletions files for current segment in the directory
         // Set detetions file generation number to the highest + 1
         $this->_delGen = max($delFileList) + 1;
     }
     $delFile = $this->_directory->createFile($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
     Zend_Search_Lucene::releaseWriteLock($this->_directory, $lock);
     $delFile->writeInt($this->_docCount);
     $delFile->writeInt($bitCount);
     $delFile->writeBytes($delBytes);
     $this->_deletedDirty = false;
 }
Beispiel #14
0
 /**
  * Indexer Constructor.
  * 
  * @global type $webDir
  */
 public function __construct()
 {
     global $webDir;
     if (!get_config('enable_indexing')) {
         return;
     }
     $index_path = $webDir . self::$_index_dir;
     // Give read-writing permissions only for current user and group
     Zend_Search_Lucene_Storage_Directory_Filesystem::setDefaultFilePermissions(0600);
     // Utilize UTF-8 compatible text analyzer
     Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive());
     try {
         if (file_exists($index_path)) {
             $this->__index = Zend_Search_Lucene::open($index_path);
             // Open index
         } else {
             $this->__index = Zend_Search_Lucene::create($index_path);
             // Create index
         }
     } catch (Zend_Search_Lucene_Exception $e) {
         require_once 'fatal_error.php';
     }
     $this->__index->setFormatVersion(Zend_Search_Lucene::FORMAT_2_3);
     // Set Index Format Version
     Zend_Search_Lucene::setResultSetLimit(self::$_resultSetLimit);
     // Set Result Set Limit
     // write an .htaccess to prevent raw access to index files
     $htaccess = $index_path . '/.htaccess';
     if (!file_exists($htaccess)) {
         $fd = fopen($htaccess, "w");
         fwrite($fd, "deny from all\n");
         fclose($fd);
     }
     if (!file_exists($index_path . '/index.php')) {
         touch($index_path . '/index.php');
     }
 }
Beispiel #15
0
 /**
  * Write changes if it's necessary.
  *
  * This method must be invoked only from the Writer _updateSegments() method,
  * so index Write lock has to be already obtained.
  *
  * @internal
  */
 public function writeChanges()
 {
     if (!$this->_deletedDirty) {
         return;
     }
     if (extension_loaded('bitset')) {
         $delBytes = $this->_deleted;
         $bitCount = count(bitset_to_array($delBytes));
     } else {
         $byteCount = floor($this->_docCount / 8) + 1;
         $delBytes = str_repeat(chr(0), $byteCount);
         for ($count = 0; $count < $byteCount; $count++) {
             $byte = 0;
             for ($bit = 0; $bit < 8; $bit++) {
                 if (isset($this->_deleted[$count * 8 + $bit])) {
                     $byte |= 1 << $bit;
                 }
             }
             $delBytes[$count] = chr($byte);
         }
         $bitCount = count($this->_deleted);
     }
     // Get new generation number
     $this->_detectLatestDelGen();
     if ($this->_delGen == -1) {
         // Set delete file generation number to 1
         $this->_delGen = 1;
     } else {
         // Increase delete file generation number by 1
         $this->_delGen++;
     }
     $delFile = $this->_directory->createFile($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
     $delFile->writeInt($this->_docCount);
     $delFile->writeInt($bitCount);
     $delFile->writeBytes($delBytes);
     $this->_deletedDirty = false;
 }
 public function __construct($path)
 {
     parent::__construct($path);
     sfLuceneStorageFilesystem::chmod($path, 0777);
 }
Beispiel #17
0
 public function testDelete()
 {
     $directory = new Zend_Search_Lucene_Storage_Directory_Filesystem(dirname(__FILE__) . '/_source/_files');
     $segmentInfo = new Zend_Search_Lucene_Index_SegmentInfo($directory, '_1', 2, 0);
     $this->assertFalse($segmentInfo->hasDeletions());
     $segmentInfo->delete(0);
     $this->assertTrue($segmentInfo->hasDeletions());
     $delGen = $segmentInfo->getDelGen();
     // don't write changes
     unset($segmentInfo);
     $segmentInfo1 = new Zend_Search_Lucene_Index_SegmentInfo($directory, '_1', 2, $delGen);
     // Changes wasn't written, segment still has no deletions
     $this->assertFalse($segmentInfo1->hasDeletions());
     $segmentInfo1->delete(0);
     $segmentInfo1->writeChanges();
     $delGen = $segmentInfo1->getDelGen();
     unset($segmentInfo1);
     $segmentInfo2 = new Zend_Search_Lucene_Index_SegmentInfo($directory, '_1', 2, $delGen);
     $this->assertTrue($segmentInfo2->hasDeletions());
     unset($segmentInfo2);
     $directory->deleteFile('_1_' . base_convert($delGen, 10, 36) . '.del');
     $segmentInfo3 = new Zend_Search_Lucene_Index_SegmentInfo($directory, '_1', 2, -1);
     $this->assertFalse($segmentInfo3->hasDeletions());
 }
Beispiel #18
0
 /**
  * Set default file permissions
  *
  * @param integer $mode
  */
 public static function setDefaultFilePermissions($mode)
 {
     self::$_defaultFilePermissions = $mode;
 }
 /**
  * Instanciate the Lucene index
  * 
  * The index will be created if it doesn't exist yet.
  * 
  * @return \Zend_Search_Lucene_Interface							Lucene index instance
  * @throws Exception											If the index cannot be created
  */
 protected function _index()
 {
     // One-time instanciation or creation of the lucene index
     if ($this->_index === null) {
         // Try to instanciate an existing lucene index
         try {
             $this->_index = \Zend_Search_Lucene::open($this->_indexDirectory);
             // If an error occurs ...
         } catch (\Zend_Search_Lucene_Exception $e) {
             // Try to create a new lucene index ...
             try {
                 $this->_index = \Zend_Search_Lucene::create($this->_indexDirectory);
                 // If an error occurs: Failure
             } catch (\Zend_Search_Lucene_Exception $e) {
                 throw new Exception(sprintf('Error creating lucene index in "%1$s", reason: "%2$s"', $this->_indexDirectory, $e->getMessage()));
             }
         }
         // Index setup
         \Zend_Search_Lucene_Storage_Directory_Filesystem::setDefaultFilePermissions(0664);
         \Zend_Search_Lucene_Analysis_Analyzer::setDefault(new \Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive());
         \Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding('UTF-8');
         // Minimize memory consumption
         $this->_index->setMaxBufferedDocs(1);
         // Set optimization frequency
         $this->_index->setMergeFactor(max(1, intval($GLOBALS['TYPO3_CONF_VARS']['EXT']['extParams']['tw_lucenesearch']['mergeFactor'])));
         // If applicable: Optimize index
         if ($this->_indexOptimize) {
             $this->_index->optimize();
         }
         $this->_index->commit();
         if (TYPO3_MODE == 'FE') {
             \Zend_Search_Lucene::setTermsPerQueryLimit(\Tollwerk\TwLucenesearch\Utility\Indexer::indexConfig($GLOBALS['TSFE'], 'search.limits.query'));
         }
     }
     return $this->_index;
 }
Beispiel #20
0
    /**
     * Write changes if it's necessary.
     *
     * This method must be invoked only from the Writer _updateSegments() method,
     * so index Write lock has to be already obtained.
     *
     * @internal
     * @throws Zend_Search_Lucene_Exceptions
     */
    public function writeChanges()
    {
        // Get new generation number
        $latestDelGen = $this->_detectLatestDelGen();

        if (!$this->_deletedDirty) {
        	// There was no deletions by current process

            if ($latestDelGen == $this->_delGen) {
            	// Delete file hasn't been updated by any concurrent process
            	return;
            } else if ($latestDelGen > $this->_delGen) {
            	// Delete file has been updated by some concurrent process
            	// Reload deletions file
            	$this->_delGen  = $latestDelGen;
            	$this->_deleted = $this->_loadDelFile();

            	return;
            } else {
            	require_once 'Zend/Search/Lucene/Exception.php';
            	throw new Zend_Search_Lucene_Exception('Delete file processing workflow is corrupted for the segment \'' . $this->_name . '\'.');
            }
        }

        if ($latestDelGen > $this->_delGen) {
        	// Merge current deletions with latest deletions file
        	$this->_delGen = $latestDelGen;

        	$latestDelete = $this->_loadDelFile();

        	if (extension_loaded('bitset')) {
        		$this->_deleted = bitset_union($this->_deleted, $latestDelete);
        	} else {
        		$this->_deleted += $latestDelete;
        	}
        }

        if (extension_loaded('bitset')) {
            $delBytes = $this->_deleted;
            $bitCount = count(bitset_to_array($delBytes));
        } else {
            $byteCount = floor($this->_docCount/8)+1;
            $delBytes = str_repeat(chr(0), $byteCount);
            for ($count = 0; $count < $byteCount; $count++) {
                $byte = 0;
                for ($bit = 0; $bit < 8; $bit++) {
                    if (isset($this->_deleted[$count*8 + $bit])) {
                        $byte |= (1<<$bit);
                    }
                }
                $delBytes[$count] = chr($byte);
            }
            $bitCount = count($this->_deleted);
        }

        if ($this->_delGen == -1) {
            // Set delete file generation number to 1
            $this->_delGen = 1;
        } else {
            // Increase delete file generation number by 1
            $this->_delGen++;
        }

        $delFile = $this->_directory->createFile($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
        $delFile->writeInt($this->_docCount);
        $delFile->writeInt($bitCount);
        $delFile->writeBytes($delBytes);

        $this->_deletedDirty = false;
    }
Beispiel #21
0
 private function getIndex()
 {
     global $prefs;
     Zend_Search_Lucene_Storage_Directory_Filesystem::setDefaultFilePermissions(0660);
     Zend_Search_Lucene_Analysis_Analyzer::setDefault(new StandardAnalyzer_Analyzer_Standard_English());
     if ($this->indexNeedsRebuilding()) {
         return $this->rebuildIndex();
     }
     return Zend_Search_Lucene::open($this->file);
 }