/**
  * Adds a document to this segment.
  *
  * @param Zend_Search_Lucene_Document $document
  * @throws Zend_Search_Lucene_Exception
  */
 public function addDocument(Zend_Search_Lucene_Document $document)
 {
     $storedFields = array();
     $docNorms = array();
     $similarity = Zend_Search_Lucene_Search_Similarity::getDefault();
     foreach ($document->getFieldNames() as $fieldName) {
         $field = $document->getField($fieldName);
         $this->addField($field);
         if ($field->storeTermVector) {
             /**
              * @todo term vector storing support
              */
             throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.');
         }
         if ($field->isIndexed) {
             if ($field->isTokenized) {
                 $tokenList = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($field->stringValue);
             } else {
                 $tokenList = array();
                 $tokenList[] = new Zend_Search_Lucene_Analysis_Token($field->stringValue, 0, strlen($field->stringValue));
             }
             $docNorms[$field->name] = chr($similarity->encodeNorm($similarity->lengthNorm($field->name, count($tokenList))));
             $position = 0;
             foreach ($tokenList as $token) {
                 $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name);
                 $termKey = $term->key();
                 if (!isset($this->_termDictionary[$termKey])) {
                     // New term
                     $this->_termDictionary[$termKey] = $term;
                     $this->_termDocs[$termKey] = array();
                     $this->_termDocs[$termKey][$this->_docCount] = array();
                 } else {
                     if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
                         // Existing term, but new term entry
                         $this->_termDocs[$termKey][$this->_docCount] = array();
                     }
                 }
                 $position += $token->getPositionIncrement();
                 $this->_termDocs[$termKey][$this->_docCount][] = $position;
             }
         }
         if ($field->isStored) {
             $storedFields[] = $field;
         }
     }
     foreach ($this->_fields as $fieldName => $field) {
         if (!$field->isIndexed) {
             continue;
         }
         if (!isset($this->_norms[$fieldName])) {
             $this->_norms[$fieldName] = str_repeat(chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, 0))), $this->_docCount);
         }
         if (isset($docNorms[$fieldName])) {
             $this->_norms[$fieldName] .= $docNorms[$fieldName];
         } else {
             $this->_norms[$fieldName] .= chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, 0)));
         }
     }
     $this->addStoredFields($storedFields);
 }
Exemplo n.º 2
0
 /**
  * Return the default Similarity implementation used by indexing and search
  * code.
  *
  * @return Zend_Search_Lucene_Search_Similarity
  */
 public static function getDefault()
 {
     if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Search_Similarity) {
         self::$_defaultImpl = new Zend_Search_Lucene_Search_Similarity_Default();
     }
     return self::$_defaultImpl;
 }
Exemplo n.º 3
0
 /**
  * Return the default Similarity implementation used by indexing and search
  * code.
  *
  * @return Zend_Search_Lucene_Search_Similarity
  */
 public static function getDefault()
 {
     if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Search_Similarity) {
         // require_once 'Zend/Search/Lucene/Search/Similarity/Default.php';
         self::$_defaultImpl = new Zend_Search_Lucene_Search_Similarity_Default();
     }
     return self::$_defaultImpl;
 }
Exemplo n.º 4
0
 /**
  * Return the default Similarity implementation used by indexing and search
  * code.
  *
  * @return Zend_Search_Lucene_Search_Similarity
  */
 public static function getDefault()
 {
     if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Search_Similarity) {
         require_once sfConfig::get('sf_lib_dir') . '/modules/search/lib/Lucene/Search/Similarity/Default.php';
         self::$_defaultImpl = new Zend_Search_Lucene_Search_Similarity_Default();
     }
     return self::$_defaultImpl;
 }
Exemplo n.º 5
0
    /**
     * Returns norm vector, encoded in a byte string
     *
     * @param string $fieldName
     * @return string
     */
    public function normVector($fieldName)
    {
        $fieldNum = $this->getFieldNum($fieldName);

        if ($fieldNum == -1  ||  !($this->_fields[$fieldNum]->isIndexed)) {
            $similarity = Zend_Search_Lucene_Search_Similarity::getDefault();

            return str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )),
                              $this->_docCount);
        }

        if (!isset($this->_norms[$fieldNum])) {
            $this->_loadNorm($fieldNum);
        }

        return $this->_norms[$fieldNum];
    }
Exemplo n.º 6
0
 public function getSimilarity()
 {
     return Zend_Search_Lucene_Search_Similarity::getDefault();
 }
Exemplo n.º 7
0
 /**
  * Adds a document to this segment.
  *
  * @param Zend_Search_Lucene_Document $document
  * @throws Zend_Search_Lucene_Exception
  */
 public function addDocument(Zend_Search_Lucene_Document $document)
 {
     /** Zend_Search_Lucene_Search_Similarity */
     // require_once 'Zend/Search/Lucene/Search/Similarity.php';
     $storedFields = array();
     $docNorms = array();
     $similarity = Zend_Search_Lucene_Search_Similarity::getDefault();
     foreach ($document->getFieldNames() as $fieldName) {
         $field = $document->getField($fieldName);
         if ($field->storeTermVector) {
             /**
              * @todo term vector storing support
              */
             // require_once 'Zend/Search/Lucene/Exception.php';
             throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.');
         }
         if ($field->isIndexed) {
             if ($field->isTokenized) {
                 /** Zend_Search_Lucene_Analysis_Analyzer */
                 // require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
                 $analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
                 $analyzer->setInput($field->value, $field->encoding);
                 $position = 0;
                 $tokenCounter = 0;
                 while (($token = $analyzer->nextToken()) !== null) {
                     $tokenCounter++;
                     $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name);
                     $termKey = $term->key();
                     if (!isset($this->_termDictionary[$termKey])) {
                         // New term
                         $this->_termDictionary[$termKey] = $term;
                         $this->_termDocs[$termKey] = array();
                         $this->_termDocs[$termKey][$this->_docCount] = array();
                     } else {
                         if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
                             // Existing term, but new term entry
                             $this->_termDocs[$termKey][$this->_docCount] = array();
                         }
                     }
                     $position += $token->getPositionIncrement();
                     $this->_termDocs[$termKey][$this->_docCount][] = $position;
                 }
                 if ($tokenCounter == 0) {
                     // Field contains empty value. Treat it as non-indexed and non-tokenized
                     $field = clone $field;
                     $field->isIndexed = $field->isTokenized = false;
                 } else {
                     $docNorms[$field->name] = chr($similarity->encodeNorm($similarity->lengthNorm($field->name, $tokenCounter) * $document->boost * $field->boost));
                 }
             } else {
                 if (($fieldUtf8Value = $field->getUtf8Value()) == '') {
                     // Field contains empty value. Treat it as non-indexed and non-tokenized
                     $field = clone $field;
                     $field->isIndexed = $field->isTokenized = false;
                 } else {
                     $term = new Zend_Search_Lucene_Index_Term($fieldUtf8Value, $field->name);
                     $termKey = $term->key();
                     if (!isset($this->_termDictionary[$termKey])) {
                         // New term
                         $this->_termDictionary[$termKey] = $term;
                         $this->_termDocs[$termKey] = array();
                         $this->_termDocs[$termKey][$this->_docCount] = array();
                     } else {
                         if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
                             // Existing term, but new term entry
                             $this->_termDocs[$termKey][$this->_docCount] = array();
                         }
                     }
                     $this->_termDocs[$termKey][$this->_docCount][] = 0;
                     // position
                     $docNorms[$field->name] = chr($similarity->encodeNorm($similarity->lengthNorm($field->name, 1) * $document->boost * $field->boost));
                 }
             }
         }
         if ($field->isStored) {
             $storedFields[] = $field;
         }
         $this->addField($field);
     }
     foreach ($this->_fields as $fieldName => $field) {
         if (!$field->isIndexed) {
             continue;
         }
         if (!isset($this->_norms[$fieldName])) {
             $this->_norms[$fieldName] = str_repeat(chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, 0))), $this->_docCount);
         }
         if (isset($docNorms[$fieldName])) {
             $this->_norms[$fieldName] .= $docNorms[$fieldName];
         } else {
             $this->_norms[$fieldName] .= chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, 0)));
         }
     }
     $this->addStoredFields($storedFields);
 }
Exemplo n.º 8
0
 /**
  * Retrive similarity used by index reader
  *
  * @return Zend_Search_Lucene_Search_Similarity
  */
 public function getSimilarity()
 {
     /** Zend_Search_Lucene_Search_Similarity */
     // require_once 'Zend/Search/Lucene/Search/Similarity.php';
     return Zend_Search_Lucene_Search_Similarity::getDefault();
 }
 public function find()
 {
     if ($this->e) {
         throw new Exception('Because you said so');
     }
     $this->args = func_get_args();
     $this->scoring = Zend_Search_Lucene_Search_Similarity::getDefault();
     return range(1, 100);
 }
 /**
  * Wrapper for Lucene's find()
  * @param mixed $query The query
  * @return array The array of results
  */
 public function find($query)
 {
     $this->configure();
     $timer = sfTimerManager::getTimer('Zend Search Lucene Find');
     $sort = array();
     $scoring = null;
     if ($query instanceof sfLuceneCriteria) {
         foreach ($query->getSorts() as $sortable) {
             $sort[] = $sortable['field'];
             $sort[] = $sortable['type'];
             $sort[] = $sortable['order'];
         }
         $scoring = $query->getScoringAlgorithm();
         $query = $query->getQuery();
     } elseif (is_string($query)) {
         $query = sfLuceneCriteria::newInstance($this)->addString($query)->getQuery();
     }
     $defaultScoring = Zend_Search_Lucene_Search_Similarity::getDefault();
     if ($scoring) {
         Zend_Search_Lucene_Search_Similarity::setDefault($scoring);
     }
     try {
         // as we rarely sort, we can avoid the overhead of call_user_func() with this conditional
         if (count($sort)) {
             $args = array_merge(array($query), $sort);
             $results = call_user_func_array(array($this->getLucene(), 'find'), $args);
         } else {
             $results = $this->getLucene()->find($query);
         }
     } catch (Exception $e) {
         Zend_Search_Lucene_Search_Similarity::setDefault($defaultScoring);
         $timer->addTime();
         throw $e;
     }
     Zend_Search_Lucene_Search_Similarity::setDefault($defaultScoring);
     $timer->addTime();
     return $results;
 }
Exemplo n.º 11
0
 /**
  * Returns normalization factor for specified documents
  *
  * @param integer $id
  * @param string $fieldName
  * @return string
  */
 public function norm($id, $fieldName)
 {
     $fieldNum = $this->getFieldNum($fieldName);
     if (!$this->_fields[$fieldNum]->isIndexed) {
         return null;
     }
     if (!isset($this->_norms[$fieldNum])) {
         $fFile = $this->openCompoundFile('.f' . $fieldNum);
         $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount);
     }
     return Zend_Search_Lucene_Search_Similarity::decodeNorm(ord($this->_norms[$fieldNum][$id]));
 }
Exemplo n.º 12
0
 /**
  * Retrive similarity used by index reader
  *
  * @return Zend_Search_Lucene_Search_Similarity
  */
 public function getSimilarity()
 {
     /** Zend_Search_Lucene_Search_Similarity */
     require_once sfConfig::get('sf_lib_dir') . '/modules/search/lib/Lucene/Search/Similarity.php';
     return Zend_Search_Lucene_Search_Similarity::getDefault();
 }
Exemplo n.º 13
0
 /**
  * Dump Field Info (.fnm) segment file
  */
 private function _dumpFNM()
 {
     $fnmFile = $this->_directory->createFile($this->_name . '.fnm');
     $fnmFile->writeVInt(count($this->_fields));
     foreach ($this->_fields as $field) {
         $fnmFile->writeString($field->name);
         $fnmFile->writeByte(($field->isIndexed ? 0x1 : 0x0) | ($field->storeTermVector ? 0x2 : 0x0));
         if ($field->isIndexed) {
             $fieldNum = $this->_fields[$field->name]->number;
             $fieldName = $field->name;
             $similarity = Zend_Search_Lucene_Search_Similarity::getDefault();
             $norm = '';
             for ($count = 0; $count < $this->_docCount; $count++) {
                 $numTokens = isset($this->_fieldLengths[$fieldName][$count]) ? $this->_fieldLengths[$fieldName][$count] : 0;
                 $norm .= chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, $numTokens)));
             }
             $normFileName = $this->_name . '.f' . $fieldNum;
             $fFile = $this->_directory->createFile($normFileName);
             $fFile->writeBytes($norm);
             $this->_files[] = $normFileName;
         }
     }
     $this->_files[] = $this->_name . '.fnm';
 }