public function testFields() { $document = new Document(); $document->addField(Document\Field::Text('title', 'Title')); $document->addField(Document\Field::Text('annotation', 'Annotation')); $document->addField(Document\Field::Text('body', 'Document body, document body, document body...')); $fieldnamesDiffArray = array_diff($document->getFieldNames(), array('title', 'annotation', 'body')); $this->assertTrue(is_array($fieldnamesDiffArray)); $this->assertEquals(count($fieldnamesDiffArray), 0); $this->assertEquals($document->title, 'Title'); $this->assertEquals($document->annotation, 'Annotation'); $this->assertEquals($document->body, 'Document body, document body, document body...'); $this->assertEquals($document->getField('title')->value, 'Title'); $this->assertEquals($document->getField('annotation')->value, 'Annotation'); $this->assertEquals($document->getField('body')->value, 'Document body, document body, document body...'); $this->assertEquals($document->getFieldValue('title'), 'Title'); $this->assertEquals($document->getFieldValue('annotation'), 'Annotation'); $this->assertEquals($document->getFieldValue('body'), 'Document body, document body, document body...'); if (PHP_OS == 'AIX') { return; // tests below here not valid on AIX } $wordsWithUmlautsIso88591 = iconv('UTF-8', 'ISO-8859-1', 'Words with umlauts: åãü...'); $document->addField(Document\Field::Text('description', $wordsWithUmlautsIso88591, 'ISO-8859-1')); $this->assertEquals($document->description, $wordsWithUmlautsIso88591); $this->assertEquals($document->getFieldUtf8Value('description'), 'Words with umlauts: åãü...'); }
/** * Adds a document to this segment. * * @param \ZendSearch\Lucene\Document $document * @throws LuceneException\UnsupportedMethodCallException */ public function addDocument(Document $document) { $storedFields = array(); $docNorms = array(); $similarity = AbstractSimilarity::getDefault(); foreach ($document->getFieldNames() as $fieldName) { $field = $document->getField($fieldName); if ($field->storeTermVector) { /** * @todo term vector storing support */ throw new LuceneException\UnsupportedMethodCallException('Store term vector functionality is not supported yet.'); } if ($field->isIndexed) { if ($field->isTokenized) { $analyzer = Analyzer\Analyzer::getDefault(); $analyzer->setInput($field->value, $field->encoding); $position = 0; $tokenCounter = 0; while (($token = $analyzer->nextToken()) !== null) { $tokenCounter++; $term = new Index\Term($token->getTermText(), $field->name); $termKey = $term->key(); if (!isset($this->_termDictionary[$termKey])) { // New term $this->_termDictionary[$termKey] = $term; $this->_termDocs[$termKey] = array(); $this->_termDocs[$termKey][$this->_docCount] = array(); } elseif (!isset($this->_termDocs[$termKey][$this->_docCount])) { // Existing term, but new term entry $this->_termDocs[$termKey][$this->_docCount] = array(); } $position += $token->getPositionIncrement(); $this->_termDocs[$termKey][$this->_docCount][] = $position; } if ($tokenCounter == 0) { // Field contains empty value. Treat it as non-indexed and non-tokenized $field = clone $field; $field->isIndexed = $field->isTokenized = false; } else { $docNorms[$field->name] = chr($similarity->encodeNorm($similarity->lengthNorm($field->name, $tokenCounter) * $document->boost * $field->boost)); } } elseif (($fieldUtf8Value = $field->getUtf8Value()) == '') { // Field contains empty value. Treat it as non-indexed and non-tokenized $field = clone $field; $field->isIndexed = $field->isTokenized = false; } else { $term = new Index\Term($fieldUtf8Value, $field->name); $termKey = $term->key(); if (!isset($this->_termDictionary[$termKey])) { // New term $this->_termDictionary[$termKey] = $term; $this->_termDocs[$termKey] = array(); $this->_termDocs[$termKey][$this->_docCount] = array(); } elseif (!isset($this->_termDocs[$termKey][$this->_docCount])) { // Existing term, but new term entry $this->_termDocs[$termKey][$this->_docCount] = array(); } $this->_termDocs[$termKey][$this->_docCount][] = 0; // position $docNorms[$field->name] = chr($similarity->encodeNorm($similarity->lengthNorm($field->name, 1) * $document->boost * $field->boost)); } } if ($field->isStored) { $storedFields[] = $field; } $this->addField($field); } foreach ($this->_fields as $fieldName => $field) { if (!$field->isIndexed) { continue; } if (!isset($this->_norms[$fieldName])) { $this->_norms[$fieldName] = str_repeat(chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, 0))), $this->_docCount); } if (isset($docNorms[$fieldName])) { $this->_norms[$fieldName] .= $docNorms[$fieldName]; } else { $this->_norms[$fieldName] .= chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, 0))); } } $this->addStoredFields($storedFields); }