public function __construct($path = SEARCH_INDEX_PATH) { global $CFG, $db; $this->path = $path; //test to see if there is a valid index on disk, at the specified path try { $test_index = new Zend_Search_Lucene($this->path, false); $validindex = true; } catch (Exception $e) { $validindex = false; } //retrieve file system info about the index if it is valid if ($validindex) { $this->size = display_size(get_directory_size($this->path)); $index_dir = get_directory_list($this->path, '', false, false); $this->filecount = count($index_dir); $this->indexcount = $test_index->count(); } else { $this->size = 0; $this->filecount = 0; $this->indexcount = 0; } $db_exists = false; //for now //get all the current tables in moodle $admin_tables = $db->MetaTables(); //TODO: use new IndexDBControl class for database checks? //check if our search table exists if (in_array($CFG->prefix . SEARCH_DATABASE_TABLE, $admin_tables)) { //retrieve database information if it does $db_exists = true; //total documents $this->dbcount = count_records(SEARCH_DATABASE_TABLE); //individual document types // $types = search_get_document_types(); $types = search_collect_searchables(true, false); sort($types); foreach ($types as $type) { $c = count_records(SEARCH_DATABASE_TABLE, 'doctype', $type); $this->types[$type] = (int) $c; } } else { $this->dbcount = 0; $this->types = array(); } //check if the busy flag is set if (isset($CFG->search_indexer_busy) && $CFG->search_indexer_busy == '1') { $this->complete = false; } else { $this->complete = true; } //get the last run date for the indexer if ($this->valid() && $CFG->search_indexer_run_date) { $this->time = $CFG->search_indexer_run_date; } else { $this->time = 0; } }
function search($query) { $this->load->library('zend', 'Zend/Search/Lucene'); $this->load->library('zend'); $this->zend->load('Zend/Search/Lucene'); $index = new Zend_Search_Lucene('C:\\xampp\\xampp\\htdocs\\controle_frota\\lucene\\feeds_index'); $hits = $index->find($query); echo 'Index contains ' . $index->count() . ' documents.<br /><br />'; echo 'Search for "' . $query . '" returned ' . count($hits) . ' hits<br /><br />'; foreach ($hits as $hit) { echo $hit->title . '<br />'; echo 'Score: ' . sprintf('%.2f', $hit->score) . '<br />'; echo $hit->link . '<br /><br />'; } }
} foreach ($deletions as $delete) { // find the specific document in the index, using it's docid and doctype as keys // change from default text only search to include numerals for this search. Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum_CaseInsensitive()); $doc = $index->find("+docid:{$delete->id} +doctype:{$mod->name} +itemtype:{$delete->itemtype}"); // get the record, should only be one foreach ($doc as $thisdoc) { ++$deletion_count; mtrace(" Delete: {$thisdoc->title} (database id = {$thisdoc->dbid}, index id = {$thisdoc->id}, moodle instance id = {$thisdoc->docid})"); //remove it from index and database table $dbcontrol->delDocument($thisdoc); $index->delete($thisdoc->id); } } } else { mtrace("No types to delete.\n"); } mtrace("Finished {$mod->name}.\n"); } } } } /// commit changes $index->commit(); /// update index date and index size set_config('search_indexer_cleanup_date', $startcleantime); set_config('search_index_size', (int) $CFG->search_index_size - (int) $deletion_count); mtrace("Finished {$deletion_count} removals."); mtrace('Index size after: ' . $index->count());
* with this package in the file LICENSE.txt. * It is also available through the world-wide-web at this URL: * http://framework.zend.com/license/new-bsd * If you did not receive a copy of the license and are unable to * obtain it through the world-wide-web, please send an email * to license@zend.com so we can send you a copy immediately. * * @category Zend * @package Zend_Search_Lucene * @subpackage Demos * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License */ /** * @see Zend_Search_Lucene */ require_once 'Zend/Search/Lucene.php'; $index = new Zend_Search_Lucene('/tmp/feeds_index'); echo "Index contains {$index->count()} documents.\n"; $search = 'php'; $hits = $index->find(strtolower($search)); echo "Search for \"{$search}\" returned " . count($hits) . " hits.\n\n"; foreach ($hits as $hit) { echo str_repeat('-', 80) . "\n"; echo 'ID: ' . $hit->id . "\n"; echo 'Score: ' . sprintf('%.2f', $hit->score) . "\n\n"; foreach ($hit->getDocument()->getFieldNames() as $field) { echo "{$field}: \n"; echo ' ' . trim(substr($hit->{$field}, 0, 76)) . "\n"; } }
/** * Reindexes the search engine. */ public function reindex() { Loader::library('3rdparty/Zend/Search/Lucene'); Loader::library('3rdparty/StandardAnalyzer/Analyzer/Standard/English'); $index = new Zend_Search_Lucene(DIR_FILES_CACHE_PAGES, true); //Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); Zend_Search_Lucene_Analysis_Analyzer::setDefault(new StandardAnalyzer_Analyzer_Standard_English()); $db = Loader::db(); $collection_attributes = Loader::model('collection_attributes'); $r = $db->query("select cID from Pages order by cID asc"); $g = Group::getByID(GUEST_GROUP_ID); $nh = Loader::helper('navigation'); while ($row = $r->fetchRow()) { $c = Page::getByID($row['cID'], 'ACTIVE'); if($c->getCollectionAttributeValue('exclude_search_index')) continue; $themeObject = $c->getCollectionThemeObject(); $g->setPermissionsForObject($c); if ($g->canRead()) { $pageID = md5($row['cID']); $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Keyword('cIDhash', $pageID)); $doc->addField(Zend_Search_Lucene_Field::Unindexed('cID', $row['cID'])); $doc->addField(Zend_Search_Lucene_Field::Text('cName', $c->getCollectionName(), APP_CHARSET)); $doc->addField(Zend_Search_Lucene_Field::Keyword('ctHandle', $c->getCollectionTypeHandle())); $doc->addField(Zend_Search_Lucene_Field::Text('cDescription', $c->getCollectionDescription(), APP_CHARSET)); $doc->addField(Zend_Search_Lucene_Field::Text('cBody', $this->getBodyContentFromPage($c), APP_CHARSET)); if (is_object($themeObject)) { $doc->addField(Zend_Search_Lucene_Field::Text('cTheme', $themeObject->getThemeHandle())); } $doc->addField(Zend_Search_Lucene_Field::Text( 'cPath', $c->getCollectionPath())); if (count($this->cPathSections) > 0) { foreach($this->cPathSections as $var => $cPath) { $isInSection = (strstr(strtolower($c->getCollectionPath()), $cPath . '/')) ? 'true' : 'false'; $doc->addField(Zend_Search_Lucene_Field::Keyword($var, $isInSection)); } } $attributes=$c->getSetCollectionAttributes(); foreach($attributes as $attribute){ if ($attribute->isCollectionAttributeKeySearchable()) { $doc->addField(Zend_Search_Lucene_Field::Keyword( $attribute->akHandle, $c->getCollectionAttributeValue($attribute) )); } } $index->addDocument($doc); } } $result = new stdClass; $result->count = $index->count(); return $result; }
/** * Calculate result vector for non Conjunction query * (like '+something -another') * * @param Zend_Search_Lucene $reader */ private function _calculateNonConjunctionResult($reader) { if (extension_loaded('bitset')) { $required = null; $neither = bitset_empty(); $prohibited = bitset_empty(); foreach ($this->_terms as $termId => $term) { $termDocs = bitset_from_array($reader->termDocs($term)); if ($this->_signs[$termId] === true) { // required if ($required !== null) { $required = bitset_intersection($required, $termDocs); } else { $required = $termDocs; } } elseif ($this->_signs[$termId] === false) { // prohibited $prohibited = bitset_union($prohibited, $termDocs); } else { // neither required, nor prohibited $neither = bitset_union($neither, $termDocs); } $this->_termsPositions[$termId] = $reader->termPositions($term); } if ($required === null) { $required = $neither; } $this->_resVector = bitset_intersection($required, bitset_invert($prohibited, $reader->count())); } else { $required = null; $neither = array(); $prohibited = array(); foreach ($this->_terms as $termId => $term) { $termDocs = array_flip($reader->termDocs($term)); if ($this->_signs[$termId] === true) { // required if ($required !== null) { // substitute for bitset_intersection foreach ($required as $key => $value) { if (!isset($termDocs[$key])) { unset($required[$key]); } } } else { $required = $termDocs; } } elseif ($this->_signs[$termId] === false) { // prohibited // substitute for bitset_union foreach ($termDocs as $key => $value) { $prohibited[$key] = $value; } } else { // neither required, nor prohibited // substitute for bitset_union foreach ($termDocs as $key => $value) { $neither[$key] = $value; } } $this->_termsPositions[$termId] = $reader->termPositions($term); } if ($required === null) { $required = $neither; } foreach ($required as $key => $value) { if (isset($prohibited[$key])) { unset($required[$key]); } } $this->_resVector = $required; } }
/** * Computes a score factor for a simple term or a phrase. * * The default implementation is: * return idfFreq(searcher.docFreq(term), searcher.maxDoc()); * * input - the term in question or array of terms * reader - reader the document collection being searched * Returns a score factor for the term * * @param mixed $input * @param Zend_Search_Lucene $reader * @return a score factor for the term */ public function idf($input, $reader) { if (!is_array($input)) { return $this->idfFreq($reader->docFreq($input), $reader->count()); } else { $idf = 0.0; foreach ($input as $term) { $idf += $this->idfFreq($reader->docFreq($term), $reader->count()); } return $idf; } }
if ($counter % 2000 == 0) { $index->commit(); mtrace(".. {$counter}"); } } } //end transaction } } //commit left over documents, and finish up $index->commit(); mtrace("-- {$counter} documents indexed"); mtrace("done.\n"); } } else { mtrace("No search document found for plugin {$mod->name}. Ignoring."); } } } /// finished modules mtrace('Finished activity modules'); search_stopwatch(); mtrace(".<br/><a href='index.php'>Back to query page</a>."); mtrace('</pre>'); /// finished, turn busy flag off set_config('search_indexer_busy', '0'); /// mark the time we last updated set_config('search_indexer_run_date', time()); /// and the index size set_config('search_index_size', (int) $index->count());
$index->addDocument($document); //commit every x new documents, and print a status message if ($counter % 2000 == 0) { $index->commit(); mtrace(".. {$counter}"); } } } //end transaction } } //commit left over documents, and finish up $index->commit(); mtrace("-- {$counter} documents indexed"); mtrace("done.\n"); } } } } //finished modules mtrace('Finished activity modules'); search_stopwatch(); mtrace(".<br/><a href='index.php'>Back to query page</a>."); mtrace('</pre>'); //finished, turn busy flag off set_config("search_indexer_busy", "0"); //mark the time we last updated set_config("search_indexer_run_date", time()); //and the index size set_config("search_index_size", (int) $index->count());
$row["idx"] = $hit->id; $row["score"] = sprintf('%d%%', $hit->score * 100); $row["title"] = page::htmlentities($d->getFieldValue('name')); $row["related"] = sprintf("<a href='%s%sID=%d'>%s</a>", $TPL["url_alloc_" . $d->getFieldValue('type')], $d->getFieldValue('type'), $d->getFieldValue('typeid'), page::htmlentities($d->getFieldValue('typename'))); $row["desc"] = page::htmlentities($d->getFieldValue('desc')); $TPL["search_results"][] = $row; } } // Wiki Search } else { if ($search && $needle && $category == "search_wiki") { $TPL["search_title"] = "Wiki Search"; $index = new Zend_Search_Lucene(ATTACHMENTS_DIR . 'search/wiki'); $query = Zend_Search_Lucene_Search_QueryParser::parse($needle); $hits = $index->find($needle); $TPL["index_count"] = $index->count(); $TPL["hits_count"] = count($hits); foreach ($hits as $hit) { $d = $hit->getDocument(); $row = array(); $row["idx"] = $hit->id; $row["score"] = sprintf('%d%%', $hit->score * 100); $row["title"] = sprintf("<a href='%starget=%s'>%s</a>", $TPL["url_alloc_wiki"], urlencode($d->getFieldValue('name')), page::htmlentities($d->getFieldValue('name'))); $row["desc"] = page::htmlentities($d->getFieldValue('desc')); $TPL["search_results"][] = $row; } } } } } }
} } } foreach ($deletions as $delete) { // find the specific document in the index, using it's docid and doctype as keys $doc = $index->find("+docid:{$delete->id} +doctype:{$mod->name} +itemtype:{$delete->itemtype}"); // get the record, should only be one foreach ($doc as $thisdoc) { ++$deletion_count; mtrace(" Delete: {$thisdoc->title} (database id = {$thisdoc->dbid}, index id = {$thisdoc->id}, moodle instance id = {$thisdoc->docid})"); //remove it from index and database table $dbcontrol->delDocument($thisdoc); $index->delete($thisdoc->id); } } } else { mtrace("No types to delete.\n"); } mtrace("Finished {$mod->name}.\n"); } } } } //commit changes $index->commit(); //update index date and index size set_config("search_indexer_cleanup_date", $startcleantime); set_config("search_index_size", (int) $CFG->search_index_size - (int) $deletion_count); mtrace("Finished {$deletion_count} removals."); mtrace('Index size after: ' . $index->count() . '</pre>');
#!/usr/local/bin/php <?php require dirname(__FILE__) . '/cli_prepend.inc.php'; // report the index $objIndex = new Zend_Search_Lucene(__SEARCH_INDEXES__ . '/forum_topics'); print "Index contains " . $objIndex->count() . " max documents.\r\n"; print "Index contains " . $objIndex->numDocs() . " actual documents.\r\n\r\n"; if ($_SERVER['argc'] != 2) { exit("error: specify a search term\r\n"); } $strSearchQuery = $_SERVER['argv'][1]; $objTopicArray = Topic::LoadArrayBySearch($strSearchQuery); print 'Search for "' . $strSearchQuery . '" returned ' . count($objTopicArray) . " topics\r\n\r\n"; foreach ($objTopicArray as $objTopic) { print '[' . $objTopic->Id . '] - ' . $objTopic->Name . "\r\n"; }