/** * Core index method used to bulk index the database content */ function index() { $start = time(); $app = JFactory::getApplication(); set_time_limit(0); $record = $this->getRecord(); //remove row id and use returned pk instead because of collision with Zend - it uses id as the primary //key in the index unset($record['id']); //set basic information about current document $currId = $record['pk']; $title = $record['title']; $category = $record['catid']; $recId = $this->getNextRecordId($currId); $additional = $record; unset($additional['introtext']); unset($additional['fulltext']); //prepare redir url $redir_url = "index2.php?option=com_jucene&task=index&view=jucene_index&rid=" . $recId; $params =& JComponentHelper::getParams('com_jucene'); //get the index through the helper, because we need to import it into the search plugin too $index = JuceneHelper::getIndex(); //retrieve actual document count in the index before indexation $documents = $index->numDocs(); $this->raiseMessage(JText::sprintf('INDEXRECORDCOUNT', $documents)); JuceneHelper::removeFromIndexById($currId); $pmml = false; //TODO devide this into index methods based on content types- JUCENE helper!!! - no reason to try to transform it if it' HTML document $xml_field = substr($record['fulltext'], 0, 5) != '<?xml' ? $record['introtext'] : $record['fulltext']; //html or PMML? if (substr($xml_field, 0, 5) == '<?xml') { $dom = new DOMDocument(); $pmml = true; $xslt = new DOMDocument(); $error = false; //load xslt stylesheet if (!@$xslt->load(JPATH_COMPONENT_ADMINISTRATOR . DS . 'xslt/jucene.xsl')) { $error = true; $this->raiseMessage("XSLTLOADERROR", 'error'); } $proc = new XSLTProcessor(); if (!$proc->importStylesheet($xslt)) { $error = true; $this->raiseMessage("XSLTIMPORTERROR", 'error'); } unset($record['fulltext']); unset($record['introtext']); if ($dom->loadXML($xml_field) && !$error && $pmml) { //simplify the document - prepare it for the indexation process $xslOutput = $proc->transformToXml($dom); //create new DOM document to preserve output and transform the XML to the indexable one $transXml = new DOMDocument(); $transXml->preserveWhitespace = false; @$transXml->loadXML($xslOutput); //unset unneccessary variables unset($xslOutput); unset($dom); unset($xslt); //index every assoc rule as document with same credentials if (!$error) { $rules = $transXml->getElementsByTagName("AssociationRule"); $rulesCount = $rules->length; if ($rulesCount == 0) { $error = true; $this->raiseMessage('XMLDOCUMENTNORULES', 'error'); } $rule_doc_position = 0; foreach ($rules as $rule) { $additional['rating'] = 0; $additional['position'] = $rule_doc_position; JPluginHelper::importPlugin('content'); $dispatcher =& JDispatcher::getInstance(); $results = $dispatcher->trigger('onIndexPmml', array($rule, $additional)); $rule_doc_position++; } } } else { $this->redirect($redir_url); } } else { $this->raiseMessage('XMLDOCLOADERROR', 'error'); JPluginHelper::importPlugin('content'); $dispatcher =& JDispatcher::getInstance(); $results = $dispatcher->trigger('onIndexContent', array($record['fulltext'], $additional)); } $end = time(); $documents = $index->numDocs(); $size = $index->count(); $allDocs = $this->getAllRecordsCount(); $remainingDocs = $this->getRecordCount($currId); $percent = $allDocs / 100; $doneDocs = $allDocs - $remainingDocs; $percentDone = $doneDocs / $percent; $document =& JFactory::getDocument(); if (is_numeric($recId)) { if (JDEBUG) { $this->raiseMessage(JText::sprintf('REMAININGRECORDS', $remainingDocs)); $this->raiseMessage(JText::sprintf('INDEXRECORDCOUNT', $documents)); } $index->commit(); $this->redirect($redir_url); } else { $index->optimize(); $this->raiseMessage('DONEINDEXING', 'error'); $this->redirect("index.php?option=com_jucene"); } }
/** * * @param $doc_id * @param $new_content */ function updateIndexDocumentKbi($doc_id, $new_content, $specific_index = NULL) { if (is_numeric($doc_id) && JuceneHelper::stringContains($new_content, "<?xml", false)) { JuceneHelper::removeFromIndexById($doc_id); return $this->insertToIndexKbi($new_content, $specific_index); } else { return false; } }