public function indexPage(Kwf_Component_Data $page, $debugOutput = false) { $boosts = array('contenth1' => 5, 'contenth2' => 3, 'contenth3' => 2, 'contenth4' => 1.5, 'contenth5' => 1.3, 'contenth6' => 1.2, 'contentstrong' => 2); if (Kwc_Abstract::getFlag($page->componentClass, 'skipFulltext')) { return; } //performance //echo "checking for childComponents\n"; $fulltextComponents = $this->getFulltextComponents($page); if ($fulltextComponents) { if ($debugOutput) { echo " *** indexing {$page->componentId} {$page->url}..."; } $contents = $this->getFulltextContentForPage($page, $fulltextComponents); unset($fulltextComponents); if (!$contents || !isset($contents['content']) || !$contents['content']) { if ($debugOutput) { echo " [no content]"; } return false; } $doc = new Zend_Search_Lucene_Document(); //whole content, for preview in search result $doc->addField(Zend_Search_Lucene_Field::UnIndexed('content', $contents['content'], 'utf-8')); unset($contents['content']); $t = $page->getTitle(); if (substr($t, -3) == ' - ') { $t = substr($t, 0, -3); } $field = Zend_Search_Lucene_Field::Text('title', $t, 'utf-8'); $field->boost = 10; $doc->addField($field); foreach ($contents as $fieldName => $text) { if ($text instanceof Kwf_DateTime) { $text = $text->format(); } $field = Zend_Search_Lucene_Field::UnStored($fieldName, $text, 'utf-8'); if (isset($boosts[$fieldName])) { $field->boost = $boosts[$fieldName]; } $doc->addField($field); } if ($debugOutput) { echo "\n"; } //das wird verwendet um alle dokumente im index zu finden //ned wirklisch a schöne lösung :( $field = Zend_Search_Lucene_Field::UnStored('dummy', 'dummy', 'utf-8'); $field->boost = 0.0001; $doc->addField($field); $field = Zend_Search_Lucene_Field::Keyword('componentId', $page->componentId, 'utf-8'); $field->boost = 0.0001; $doc->addField($field); //foreach ($doc->getFieldNames() as $fieldName) { //echo "$fieldName: ".substr($doc->$fieldName, 0, 80)."\n"; //echo "$fieldName: ".$doc->$fieldName."\n"; //} $term = new Zend_Search_Lucene_Index_Term($page->componentId, 'componentId'); $index = Kwf_Util_Fulltext_Lucene::getInstance($page); $hits = $index->termDocs($term); foreach ($hits as $id) { //echo "deleting $hit->componentId\n"; $index->delete($id); } $index->addDocument($doc); $this->_afterIndex($page); return true; } return false; }
public function getDocumentContent(Kwf_Component_Data $page) { $index = Kwf_Util_Fulltext_Lucene::getInstance($page); $term = new Zend_Search_Lucene_Index_Term($page->componentId, 'componentId'); foreach ($index->find(new Zend_Search_Lucene_Search_Query_Term($term)) as $doc) { return $doc->content; } return null; }