/** * Adds a document to this segment. * * @param Zend_Search_Lucene_Document $document * @throws Zend_Search_Lucene_Exception */ public function addDocument(Zend_Search_Lucene_Document $document) { $storedFields = array(); $docNorms = array(); $similarity = Zend_Search_Lucene_Search_Similarity::getDefault(); foreach ($document->getFieldNames() as $fieldName) { $field = $document->getField($fieldName); $this->addField($field); if ($field->storeTermVector) { /** * @todo term vector storing support */ throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.'); } if ($field->isIndexed) { if ($field->isTokenized) { $tokenList = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($field->stringValue); } else { $tokenList = array(); $tokenList[] = new Zend_Search_Lucene_Analysis_Token($field->stringValue, 0, strlen($field->stringValue)); } $docNorms[$field->name] = chr($similarity->encodeNorm($similarity->lengthNorm($field->name, count($tokenList)))); $position = 0; foreach ($tokenList as $token) { $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name); $termKey = $term->key(); if (!isset($this->_termDictionary[$termKey])) { // New term $this->_termDictionary[$termKey] = $term; $this->_termDocs[$termKey] = array(); $this->_termDocs[$termKey][$this->_docCount] = array(); } else { if (!isset($this->_termDocs[$termKey][$this->_docCount])) { // Existing term, but new term entry $this->_termDocs[$termKey][$this->_docCount] = array(); } } $position += $token->getPositionIncrement(); $this->_termDocs[$termKey][$this->_docCount][] = $position; } } if ($field->isStored) { $storedFields[] = $field; } } foreach ($this->_fields as $fieldName => $field) { if (!$field->isIndexed) { continue; } if (!isset($this->_norms[$fieldName])) { $this->_norms[$fieldName] = str_repeat(chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, 0))), $this->_docCount); } if (isset($docNorms[$fieldName])) { $this->_norms[$fieldName] .= $docNorms[$fieldName]; } else { $this->_norms[$fieldName] .= chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, 0))); } } $this->addStoredFields($storedFields); }
/** * Return the default Similarity implementation used by indexing and search * code. * * @return Zend_Search_Lucene_Search_Similarity */ public static function getDefault() { if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Search_Similarity) { self::$_defaultImpl = new Zend_Search_Lucene_Search_Similarity_Default(); } return self::$_defaultImpl; }
/** * Return the default Similarity implementation used by indexing and search * code. * * @return Zend_Search_Lucene_Search_Similarity */ public static function getDefault() { if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Search_Similarity) { // require_once 'Zend/Search/Lucene/Search/Similarity/Default.php'; self::$_defaultImpl = new Zend_Search_Lucene_Search_Similarity_Default(); } return self::$_defaultImpl; }
/** * Return the default Similarity implementation used by indexing and search * code. * * @return Zend_Search_Lucene_Search_Similarity */ public static function getDefault() { if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Search_Similarity) { require_once sfConfig::get('sf_lib_dir') . '/modules/search/lib/Lucene/Search/Similarity/Default.php'; self::$_defaultImpl = new Zend_Search_Lucene_Search_Similarity_Default(); } return self::$_defaultImpl; }
/** * Returns norm vector, encoded in a byte string * * @param string $fieldName * @return string */ public function normVector($fieldName) { $fieldNum = $this->getFieldNum($fieldName); if ($fieldNum == -1 || !($this->_fields[$fieldNum]->isIndexed)) { $similarity = Zend_Search_Lucene_Search_Similarity::getDefault(); return str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )), $this->_docCount); } if (!isset($this->_norms[$fieldNum])) { $this->_loadNorm($fieldNum); } return $this->_norms[$fieldNum]; }
public function getSimilarity() { return Zend_Search_Lucene_Search_Similarity::getDefault(); }
/** * Adds a document to this segment. * * @param Zend_Search_Lucene_Document $document * @throws Zend_Search_Lucene_Exception */ public function addDocument(Zend_Search_Lucene_Document $document) { /** Zend_Search_Lucene_Search_Similarity */ // require_once 'Zend/Search/Lucene/Search/Similarity.php'; $storedFields = array(); $docNorms = array(); $similarity = Zend_Search_Lucene_Search_Similarity::getDefault(); foreach ($document->getFieldNames() as $fieldName) { $field = $document->getField($fieldName); if ($field->storeTermVector) { /** * @todo term vector storing support */ // require_once 'Zend/Search/Lucene/Exception.php'; throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.'); } if ($field->isIndexed) { if ($field->isTokenized) { /** Zend_Search_Lucene_Analysis_Analyzer */ // require_once 'Zend/Search/Lucene/Analysis/Analyzer.php'; $analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault(); $analyzer->setInput($field->value, $field->encoding); $position = 0; $tokenCounter = 0; while (($token = $analyzer->nextToken()) !== null) { $tokenCounter++; $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name); $termKey = $term->key(); if (!isset($this->_termDictionary[$termKey])) { // New term $this->_termDictionary[$termKey] = $term; $this->_termDocs[$termKey] = array(); $this->_termDocs[$termKey][$this->_docCount] = array(); } else { if (!isset($this->_termDocs[$termKey][$this->_docCount])) { // Existing term, but new term entry $this->_termDocs[$termKey][$this->_docCount] = array(); } } $position += $token->getPositionIncrement(); $this->_termDocs[$termKey][$this->_docCount][] = $position; } if ($tokenCounter == 0) { // Field contains empty value. Treat it as non-indexed and non-tokenized $field = clone $field; $field->isIndexed = $field->isTokenized = false; } else { $docNorms[$field->name] = chr($similarity->encodeNorm($similarity->lengthNorm($field->name, $tokenCounter) * $document->boost * $field->boost)); } } else { if (($fieldUtf8Value = $field->getUtf8Value()) == '') { // Field contains empty value. Treat it as non-indexed and non-tokenized $field = clone $field; $field->isIndexed = $field->isTokenized = false; } else { $term = new Zend_Search_Lucene_Index_Term($fieldUtf8Value, $field->name); $termKey = $term->key(); if (!isset($this->_termDictionary[$termKey])) { // New term $this->_termDictionary[$termKey] = $term; $this->_termDocs[$termKey] = array(); $this->_termDocs[$termKey][$this->_docCount] = array(); } else { if (!isset($this->_termDocs[$termKey][$this->_docCount])) { // Existing term, but new term entry $this->_termDocs[$termKey][$this->_docCount] = array(); } } $this->_termDocs[$termKey][$this->_docCount][] = 0; // position $docNorms[$field->name] = chr($similarity->encodeNorm($similarity->lengthNorm($field->name, 1) * $document->boost * $field->boost)); } } } if ($field->isStored) { $storedFields[] = $field; } $this->addField($field); } foreach ($this->_fields as $fieldName => $field) { if (!$field->isIndexed) { continue; } if (!isset($this->_norms[$fieldName])) { $this->_norms[$fieldName] = str_repeat(chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, 0))), $this->_docCount); } if (isset($docNorms[$fieldName])) { $this->_norms[$fieldName] .= $docNorms[$fieldName]; } else { $this->_norms[$fieldName] .= chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, 0))); } } $this->addStoredFields($storedFields); }
/** * Retrive similarity used by index reader * * @return Zend_Search_Lucene_Search_Similarity */ public function getSimilarity() { /** Zend_Search_Lucene_Search_Similarity */ // require_once 'Zend/Search/Lucene/Search/Similarity.php'; return Zend_Search_Lucene_Search_Similarity::getDefault(); }
public function find() { if ($this->e) { throw new Exception('Because you said so'); } $this->args = func_get_args(); $this->scoring = Zend_Search_Lucene_Search_Similarity::getDefault(); return range(1, 100); }
/** * Wrapper for Lucene's find() * @param mixed $query The query * @return array The array of results */ public function find($query) { $this->configure(); $timer = sfTimerManager::getTimer('Zend Search Lucene Find'); $sort = array(); $scoring = null; if ($query instanceof sfLuceneCriteria) { foreach ($query->getSorts() as $sortable) { $sort[] = $sortable['field']; $sort[] = $sortable['type']; $sort[] = $sortable['order']; } $scoring = $query->getScoringAlgorithm(); $query = $query->getQuery(); } elseif (is_string($query)) { $query = sfLuceneCriteria::newInstance($this)->addString($query)->getQuery(); } $defaultScoring = Zend_Search_Lucene_Search_Similarity::getDefault(); if ($scoring) { Zend_Search_Lucene_Search_Similarity::setDefault($scoring); } try { // as we rarely sort, we can avoid the overhead of call_user_func() with this conditional if (count($sort)) { $args = array_merge(array($query), $sort); $results = call_user_func_array(array($this->getLucene(), 'find'), $args); } else { $results = $this->getLucene()->find($query); } } catch (Exception $e) { Zend_Search_Lucene_Search_Similarity::setDefault($defaultScoring); $timer->addTime(); throw $e; } Zend_Search_Lucene_Search_Similarity::setDefault($defaultScoring); $timer->addTime(); return $results; }
/** * Returns normalization factor for specified documents * * @param integer $id * @param string $fieldName * @return string */ public function norm($id, $fieldName) { $fieldNum = $this->getFieldNum($fieldName); if (!$this->_fields[$fieldNum]->isIndexed) { return null; } if (!isset($this->_norms[$fieldNum])) { $fFile = $this->openCompoundFile('.f' . $fieldNum); $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount); } return Zend_Search_Lucene_Search_Similarity::decodeNorm(ord($this->_norms[$fieldNum][$id])); }
/** * Retrive similarity used by index reader * * @return Zend_Search_Lucene_Search_Similarity */ public function getSimilarity() { /** Zend_Search_Lucene_Search_Similarity */ require_once sfConfig::get('sf_lib_dir') . '/modules/search/lib/Lucene/Search/Similarity.php'; return Zend_Search_Lucene_Search_Similarity::getDefault(); }
/** * Dump Field Info (.fnm) segment file */ private function _dumpFNM() { $fnmFile = $this->_directory->createFile($this->_name . '.fnm'); $fnmFile->writeVInt(count($this->_fields)); foreach ($this->_fields as $field) { $fnmFile->writeString($field->name); $fnmFile->writeByte(($field->isIndexed ? 0x1 : 0x0) | ($field->storeTermVector ? 0x2 : 0x0)); if ($field->isIndexed) { $fieldNum = $this->_fields[$field->name]->number; $fieldName = $field->name; $similarity = Zend_Search_Lucene_Search_Similarity::getDefault(); $norm = ''; for ($count = 0; $count < $this->_docCount; $count++) { $numTokens = isset($this->_fieldLengths[$fieldName][$count]) ? $this->_fieldLengths[$fieldName][$count] : 0; $norm .= chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, $numTokens))); } $normFileName = $this->_name . '.f' . $fieldNum; $fFile = $this->_directory->createFile($normFileName); $fFile->writeBytes($norm); $this->_files[] = $normFileName; } } $this->_files[] = $this->_name . '.fnm'; }