public function testFields() { $document = new Zend_Search_Lucene_Document(); $document->addField(Zend_Search_Lucene_Field::Text('title', 'Title')); $document->addField(Zend_Search_Lucene_Field::Text('annotation', 'Annotation')); $document->addField(Zend_Search_Lucene_Field::Text('body', 'Document body, document body, document body...')); $fieldnamesDiffArray = array_diff($document->getFieldNames(), array('title', 'annotation', 'body')); $this->assertTrue(is_array($fieldnamesDiffArray)); $this->assertEquals(count($fieldnamesDiffArray), 0); $this->assertEquals($document->title, 'Title'); $this->assertEquals($document->annotation, 'Annotation'); $this->assertEquals($document->body, 'Document body, document body, document body...'); $this->assertEquals($document->getField('title')->value, 'Title'); $this->assertEquals($document->getField('annotation')->value, 'Annotation'); $this->assertEquals($document->getField('body')->value, 'Document body, document body, document body...'); $this->assertEquals($document->getFieldValue('title'), 'Title'); $this->assertEquals($document->getFieldValue('annotation'), 'Annotation'); $this->assertEquals($document->getFieldValue('body'), 'Document body, document body, document body...'); if (PHP_OS == 'AIX') { return; // tests below here not valid on AIX } $wordsWithUmlautsIso88591 = iconv('UTF-8', 'ISO-8859-1', 'Words with umlauts: åãü...'); $document->addField(Zend_Search_Lucene_Field::Text('description', $wordsWithUmlautsIso88591, 'ISO-8859-1')); $this->assertEquals($document->description, $wordsWithUmlautsIso88591); $this->assertEquals($document->getFieldUtf8Value('description'), 'Words with umlauts: åãü...'); }
public function testFields() { $document = new Zend_Search_Lucene_Document(); $document->addField(Zend_Search_Lucene_Field::Text('title', 'Title')); $document->addField(Zend_Search_Lucene_Field::Text('annotation', 'Annotation')); $document->addField(Zend_Search_Lucene_Field::Text('body', 'Document body, document body, document body...')); $fieldnamesDiffArray = array_diff($document->getFieldNames(), array('title', 'annotation', 'body')); $this->assertTrue(is_array($fieldnamesDiffArray)); $this->assertEquals(count($fieldnamesDiffArray), 0); $this->assertEquals($document->title, 'Title'); $this->assertEquals($document->annotation, 'Annotation'); $this->assertEquals($document->body, 'Document body, document body, document body...'); $this->assertEquals($document->getField('title')->value, 'Title'); $this->assertEquals($document->getField('annotation')->value, 'Annotation'); $this->assertEquals($document->getField('body')->value, 'Document body, document body, document body...'); $this->assertEquals($document->getFieldValue('title'), 'Title'); $this->assertEquals($document->getFieldValue('annotation'), 'Annotation'); $this->assertEquals($document->getFieldValue('body'), 'Document body, document body, document body...'); $document->addField(Zend_Search_Lucene_Field::Text('description', 'Words with umlauts: εγό...', 'ISO-8859-1')); $this->assertEquals($document->description, 'Words with umlauts: εγό...'); $this->assertEquals($document->getFieldUtf8Value('description'), 'Words with umlauts: Γ₯ãü...'); }
public function indexCatalog($catalogGuid) { $index = $this->_index; $tblCatalog = new Kutu_Core_Orm_Table_Catalog(); $rowsetCatalog = $tblCatalog->find($catalogGuid); if (count($rowsetCatalog)) { //check if guid exist in index, then delete $term = new Zend_Search_Lucene_Index_Term($catalogGuid, 'guid'); $docIds = $index->termDocs($term); foreach ($docIds as $id) { $doc = $index->getDocument($id); $index->delete($id); } $rowCatalog = $rowsetCatalog->current(); $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Keyword('guid', $rowCatalog->guid)); //fill parentGuid with catalogGuid if it's kutu_doc if ($rowCatalog->profileGuid == 'kutu_doc') { $tblRelatedItem = new Kutu_Core_Orm_Table_RelatedItem(); $rowset = $tblRelatedItem->fetchAll("itemGuid='{$rowCatalog->guid}' AND relateAs='RELATED_FILE'"); if (count($rowset)) { $row = $rowset->current(); $parentCatalogGuid = $row->relatedGuid; $doc->addField(Zend_Search_Lucene_Field::Keyword('parentGuid', $parentCatalogGuid)); } } else { $doc->addField(Zend_Search_Lucene_Field::Keyword('parentGuid', $rowCatalog->guid)); } $doc->addField(Zend_Search_Lucene_Field::Text('profile', $rowCatalog->profileGuid)); $doc->addField(Zend_Search_Lucene_Field::Keyword('publishedDate', $this->_filterDateTime($rowCatalog->publishedDate))); $doc->addField(Zend_Search_Lucene_Field::Keyword('expiredDate', $this->_filterDateTime($rowCatalog->expiredDate))); $doc->addField(Zend_Search_Lucene_Field::Keyword('createdBy', $rowCatalog->createdBy)); $doc->addField(Zend_Search_Lucene_Field::Keyword('modifiedBy', $rowCatalog->modifiedBy)); $doc->addField(Zend_Search_Lucene_Field::Keyword('createdDate', $this->_filterDateTime($rowCatalog->createdDate))); $doc->addField(Zend_Search_Lucene_Field::Keyword('modifiedDate', $this->_filterDateTime($rowCatalog->modifiedDate))); $doc->addField(Zend_Search_Lucene_Field::Keyword('status', $rowCatalog->status)); if ($rowCatalog->profileGuid == 'kutu_doc') { $doc->addField(Zend_Search_Lucene_Field::Keyword('objectType', 'file')); } else { $doc->addField(Zend_Search_Lucene_Field::Keyword('objectType', 'catalog')); } $rowsetCatalogAttribute = $rowCatalog->findDependentRowsetCatalogAttribute(); if (count($rowsetCatalogAttribute)) { foreach ($rowsetCatalogAttribute as $rowCatalogAttribute) { switch ($rowCatalogAttribute->attributeGuid) { case 'fixedTitle': case 'title': $doc->addField(Zend_Search_Lucene_Field::Text('title', $rowCatalogAttribute->value)); break; case 'fixedSubTitle': case 'subTitle': $doc->addField(Zend_Search_Lucene_Field::Text('subtitle', $rowCatalogAttribute->value)); break; case 'fixedContent': case 'content': $docHtml = Zend_Search_Lucene_Document_Html::loadHTML($rowCatalogAttribute->value); $cleanedText = $docHtml->getFieldValue('body'); $doc->addField(Zend_Search_Lucene_Field::UnStored('content', $cleanedText)); break; case 'fixedKeywords': case 'keywords': $doc->addField(Zend_Search_Lucene_Field::UnStored('keywords', $rowCatalogAttribute->value)); break; case 'fixedDescription': case 'description': $doc->addField(Zend_Search_Lucene_Field::Text('description', $rowCatalogAttribute->value)); break; case 'ptsKetua': $doc->addField(Zend_Search_Lucene_Field::Text('judge', $rowCatalogAttribute->value)); break; case 'prtNomor': case 'fixedNomor': case 'fixedNumber': case 'nomor': case 'ptsNomor': $doc->addField(Zend_Search_Lucene_Field::UnStored('number', $rowCatalogAttribute->value)); break; case 'prtTahun': case 'fixedTahun': case 'fixedYear': case 'tahun': case 'ptsTahun': $doc->addField(Zend_Search_Lucene_Field::UnStored('year', $rowCatalogAttribute->value)); break; default: //check if attribute is a datetime field $tblAttribute = new Kutu_Core_Orm_Table_Attribute(); $rowAttribute = $tblAttribute->find($rowCatalogAttribute->attributeGuid)->current(); if ($rowAttribute->type == 4) { $doc->addField(Zend_Search_Lucene_Field::UnStored(strtolower($rowCatalogAttribute->attributeGuid), $this->_filterDateTime($rowCatalogAttribute->value))); } else { if ($rowAttribute->type == 2) { $docHtml = Zend_Search_Lucene_Document_Html::loadHTML($rowCatalogAttribute->value); $cleanedText = $docHtml->getFieldValue('body'); $doc->addField(Zend_Search_Lucene_Field::UnStored(strtolower($rowCatalogAttribute->attributeGuid), $cleanedText)); } else { $doc->addField(Zend_Search_Lucene_Field::UnStored(strtolower($rowCatalogAttribute->attributeGuid), $rowCatalogAttribute->value)); } } break; } } //if profile=kutu_doc, extract text from its file and put it in content field if ($rowCatalog->profileGuid == 'kutu_doc') { $row = $rowsetCatalogAttribute->findByAttributeGuid('docSystemName'); $systemName = $row->value; $row = $rowsetCatalogAttribute->findByAttributeGuid('docMimeType'); $mimeType = $row->value; $extactedText = $this->_extractText($rowCatalog->guid, $systemName, $mimeType); $doc->addField(Zend_Search_Lucene_Field::UnStored('content', $extactedText)); } } // if catalog is a kutu_doc, and if field content empty (this means // file can't be read, text can't be extracted, or file empty), do not index if ($rowCatalog->profileGuid == 'kutu_doc') { $tmpS = $doc->getFieldValue('content'); if (!empty($tmpS)) { $index->addDocument($doc); } else { } } else { $index->addDocument($doc); } } else { // do nothing } }
/** * Unrewrites a Zend_Search_Lucene document into a xfDocument * * @param Zend_Search_Lucene_Document $zdoc * @returns xfDocument */ public function unwriteDocument(Zend_Search_Lucene_Document $zdoc) { $doc = new xfDocument($zdoc->getFieldValue('__guid')); $boosts = unserialize($zdoc->getFieldValue('__boosts')); foreach ($zdoc->getFieldNames() as $name) { // ignore internal fields if (substr($name, 0, 2) != '__') { $zfield = $zdoc->getField($name); $type = 0; if ($zfield->isStored) { $type |= xfField::STORED; } if ($zfield->isIndexed) { $type |= xfField::INDEXED; } if ($zfield->isTokenized) { $type |= xfField::TOKENIZED; } if ($zfield->isBinary) { $type |= xfField::BINARY; } $field = new xfField($name, $type); $field->setBoost($boosts[$name]); $value = new xfFieldValue($field, $zfield->value); $doc->addField($value); } } foreach (unserialize($zdoc->getFieldValue('__sub_documents')) as $guid) { $doc->addChild($this->findGuid($guid)); } return $doc; }