Example #1
0
 public function testTermsStreamInterfaceSkipToTermsRetrievingTwoTermsCase()
 {
     $index = Lucene\Lucene::create(dirname(__FILE__) . '/_index/_files');
     // Zero terms
     $doc = new Document\Document();
     $doc->addField(Document\Field::Text('contents', 'someterm word'));
     $index->addDocument($doc);
     unset($index);
     $index = Lucene\Lucene::open(dirname(__FILE__) . '/_index/_files');
     $index->resetTermsStream();
     $index->skipTo(new Index\Term('term', 'contents'));
     $this->assertTrue($index->currentTerm() == new Index\Term('word', 'contents'));
     $index->closeTermsStream();
     $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
 }
Example #2
0
File: Index.php Project: stunti/zf2
 /**
  * Returns a Zend_Search_Lucene_Document object for the document
  * number $id in this index.
  *
  * @param integer|\Zend\Search\Lucene\Search\QueryHit $id
  * @return \Zend\Search\Lucene\Document\Document
  * @throws \Zend\Search\Lucene\Exception    Exception is thrown if $id is out of the range
  */
 public function getDocument($id)
 {
     if ($id instanceof Search\QueryHit) {
         /* @var $id Zend_Search_Lucene_Search_QueryHit */
         $id = $id->id;
     }
     if ($id >= $this->_docCount) {
         throw new Exception('Document id is out of the range.');
     }
     $segmentStartId = 0;
     foreach ($this->_segmentInfos as $segmentInfo) {
         if ($segmentStartId + $segmentInfo->count() > $id) {
             break;
         }
         $segmentStartId += $segmentInfo->count();
     }
     $fdxFile = $segmentInfo->openCompoundFile('.fdx');
     $fdxFile->seek(($id - $segmentStartId) * 8, SEEK_CUR);
     $fieldValuesPosition = $fdxFile->readLong();
     $fdtFile = $segmentInfo->openCompoundFile('.fdt');
     $fdtFile->seek($fieldValuesPosition, SEEK_CUR);
     $fieldCount = $fdtFile->readVInt();
     $doc = new Document\Document();
     for ($count = 0; $count < $fieldCount; $count++) {
         $fieldNum = $fdtFile->readVInt();
         $bits = $fdtFile->readByte();
         $fieldInfo = $segmentInfo->getField($fieldNum);
         if (!($bits & 2)) {
             // Text data
             $field = new Document\Field($fieldInfo->name, $fdtFile->readString(), 'UTF-8', true, $fieldInfo->isIndexed, $bits & 1);
         } else {
             // Binary data
             $field = new Document\Field($fieldInfo->name, $fdtFile->readBinary(), '', true, $fieldInfo->isIndexed, $bits & 1, true);
         }
         $doc->addField($field);
     }
     return $doc;
 }
Example #3
0
 public function testAddFieldMethodChaining()
 {
     $document = new Document\Document();
     $this->assertTrue($document->addField(Document\Field::Text('title', 'Title')) instanceof Document\Document);
     $document = new Document\Document();
     $document->addField(Document\Field::Text('title', 'Title'))->addField(Document\Field::Text('annotation', 'Annotation'))->addField(Document\Field::Text('body', 'Document body, document body, document body...'));
 }
Example #4
0
 /**
  * Adds a document to this segment.
  *
  * @param \Zend\Search\Lucene\Document\Document $document
  * @throws \Zend\Search\Lucene\Exception
  */
 public function addDocument(Document\Document $document)
 {
     $storedFields = array();
     $docNorms = array();
     $similarity = Similarity\Similarity::getDefault();
     foreach ($document->getFieldNames() as $fieldName) {
         $field = $document->getField($fieldName);
         if ($field->storeTermVector) {
             /**
              * @todo term vector storing support
              */
             throw new Lucene\Exception('Store term vector functionality is not supported yet.');
         }
         if ($field->isIndexed) {
             if ($field->isTokenized) {
                 $analyzer = Analyzer\Analyzer::getDefault();
                 $analyzer->setInput($field->value, $field->encoding);
                 $position = 0;
                 $tokenCounter = 0;
                 while (($token = $analyzer->nextToken()) !== null) {
                     $tokenCounter++;
                     $term = new Index\Term($token->getTermText(), $field->name);
                     $termKey = $term->key();
                     if (!isset($this->_termDictionary[$termKey])) {
                         // New term
                         $this->_termDictionary[$termKey] = $term;
                         $this->_termDocs[$termKey] = array();
                         $this->_termDocs[$termKey][$this->_docCount] = array();
                     } else {
                         if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
                             // Existing term, but new term entry
                             $this->_termDocs[$termKey][$this->_docCount] = array();
                         }
                     }
                     $position += $token->getPositionIncrement();
                     $this->_termDocs[$termKey][$this->_docCount][] = $position;
                 }
                 if ($tokenCounter == 0) {
                     // Field contains empty value. Treat it as non-indexed and non-tokenized
                     $field = clone $field;
                     $field->isIndexed = $field->isTokenized = false;
                 } else {
                     $docNorms[$field->name] = chr($similarity->encodeNorm($similarity->lengthNorm($field->name, $tokenCounter) * $document->boost * $field->boost));
                 }
             } else {
                 if (($fieldUtf8Value = $field->getUtf8Value()) == '') {
                     // Field contains empty value. Treat it as non-indexed and non-tokenized
                     $field = clone $field;
                     $field->isIndexed = $field->isTokenized = false;
                 } else {
                     $term = new Index\Term($fieldUtf8Value, $field->name);
                     $termKey = $term->key();
                     if (!isset($this->_termDictionary[$termKey])) {
                         // New term
                         $this->_termDictionary[$termKey] = $term;
                         $this->_termDocs[$termKey] = array();
                         $this->_termDocs[$termKey][$this->_docCount] = array();
                     } else {
                         if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
                             // Existing term, but new term entry
                             $this->_termDocs[$termKey][$this->_docCount] = array();
                         }
                     }
                     $this->_termDocs[$termKey][$this->_docCount][] = 0;
                     // position
                     $docNorms[$field->name] = chr($similarity->encodeNorm($similarity->lengthNorm($field->name, 1) * $document->boost * $field->boost));
                 }
             }
         }
         if ($field->isStored) {
             $storedFields[] = $field;
         }
         $this->addField($field);
     }
     foreach ($this->_fields as $fieldName => $field) {
         if (!$field->isIndexed) {
             continue;
         }
         if (!isset($this->_norms[$fieldName])) {
             $this->_norms[$fieldName] = str_repeat(chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, 0))), $this->_docCount);
         }
         if (isset($docNorms[$fieldName])) {
             $this->_norms[$fieldName] .= $docNorms[$fieldName];
         } else {
             $this->_norms[$fieldName] .= chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, 0)));
         }
     }
     $this->addStoredFields($storedFields);
 }