/** * Return the document object for this hit * * @return Zend_Search_Lucene_Document */ public function getDocument() { if (!$this->_document instanceof Zend_Search_Lucene_Document) { $this->_document = $this->_index->getDocument($this->id); } return $this->_document; }
function search($pQuery) { $this->mResults = array(); if (!empty($pQuery) && $this->verifySearchIndex()) { parent::search($pQuery); require_zend_file('Search/Lucene.php'); $index = new ZSearch($this->getField('index_path')); $fields = explode(',', $this->getField('index_fields')); $query = ''; $lowQuery = strtolower($pQuery); foreach ($fields as $f) { $query .= "{$f}:{$lowQuery} OR "; } $query = preg_replace('/ OR $/', '', $query); $this->mResults = $index->find($query); } return count($this->mResults); }
/** * The sum of squared weights of contained query clauses. * * @return float */ public function sumOfSquaredWeights() { // compute idf $this->_idf = $this->_reader->getSimilarity()->idf($this->_term, $this->_reader); // compute query weight $this->_queryWeight = $this->_idf * $this->_query->getBoost(); // square it return $this->_queryWeight * $this->_queryWeight; }
/** * Score calculator for sloppy phrase queries (terms sequence is fixed) * * @param integer $docId * @param ZSearch $reader * @return float */ public function _sloppyPhraseFreq($docId, ZSearch $reader) { $freq = 0; $phraseQueue = array(); $phraseQueue[0] = array(); // empty phrase $lastTerm = null; // Walk through the terms to create phrases. foreach ($this->_terms as $termId => $term) { $queueSize = count($phraseQueue); $firstPass = true; // Walk through the term positions. // Each term position produces a set of phrases. foreach ($this->_termsPositions[$termId][$docId] as $termPosition) { if ($firstPass) { for ($count = 0; $count < $queueSize; $count++) { $phraseQueue[$count][$termId] = $termPosition; } } else { for ($count = 0; $count < $queueSize; $count++) { if ($lastTerm !== null && abs($termPosition - $phraseQueue[$count][$lastTerm] - ($this->_offsets[$termId] - $this->_offsets[$lastTerm])) > $this->_slop) { continue; } $newPhraseId = count($phraseQueue); $phraseQueue[$newPhraseId] = $phraseQueue[$count]; $phraseQueue[$newPhraseId][$termId] = $termPosition; } } $firstPass = false; } $lastTerm = $termId; } foreach ($phraseQueue as $phrasePos) { $minDistance = null; for ($shift = -$this->_slop; $shift <= $this->_slop; $shift++) { $distance = 0; $start = reset($phrasePos) - reset($this->_offsets) + $shift; foreach ($this->_terms as $termId => $term) { $distance += abs($phrasePos[$termId] - $this->_offsets[$termId] - $start); if ($distance > $this->_slop) { break; } } if ($minDistance === null || $distance < $minDistance) { $minDistance = $distance; } } if ($minDistance <= $this->_slop) { $freq += $reader->getSimilarity()->sloppyFreq($minDistance); } } return $freq; }
/** * Score specified document * * @param integer $docId * @param ZSearch $reader * @return float */ public function score($docId, $reader) { if ($this->_docVector === null) { if (extension_loaded('bitset')) { $this->_docVector = bitset_from_array($reader->termDocs($this->_term)); } else { $this->_docVector = array_flip($reader->termDocs($this->_term)); } $this->_termPositions = $reader->termPositions($this->_term); $this->_initWeight($reader); } $match = extension_loaded('bitset') ? bitset_in($this->_docVector, $docId) : isset($this->_docVector[$docId]); if ($this->_sign && $match) { return $reader->getSimilarity()->tf(count($this->_termPositions[$docId])) * $this->_weight->getValue() * $reader->norm($docId, $this->_term->field); } else { return 0; } }
/** * Score calculator for non conjunction queries (not all terms are required) * * @param integer $docId * @param ZSearch $reader * @return float */ public function _nonConjunctionScore($docId, $reader) { if ($this->_coord === null) { $this->_coord = array(); $maxCoord = 0; foreach ($this->_signs as $sign) { if ($sign !== false) { $maxCoord++; } } for ($count = 0; $count <= $maxCoord; $count++) { $this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord); } } $score = 0.0; $matchedTerms = 0; foreach ($this->_terms as $termId => $term) { // Check if term is if ($this->_signs[$termId] !== false && isset($this->_termsPositions[$termId][$docId])) { $matchedTerms++; $score += $reader->getSimilarity()->tf(count($this->_termsPositions[$termId][$docId])) * $this->_weights[$termId]->getValue() * $reader->norm($docId, $term->field); } } return $score * $this->_coord[$matchedTerms]; }
$byteCount = filesize($fileName); $data = ''; while ($byteCount > 0 && ($nextBlock = fread($f, $byteCount)) != false) { $data .= $nextBlock; $byteCount -= strlen($nextBlock); } fclose($f); if ($storeContent) { $this->addField(Zend_Search_Lucene_Field::Text('contents', $data)); } else { $this->addField(Zend_Search_Lucene_Field::UnStored('contents', $data)); } } } // Create index $index = new ZSearch('index', true); // Uncomment next line if you want to have case sensitive index // ZSearchAnalyzer::setDefault(new ZSearchTextAnalyzer()); $indexSourceDir = 'IndexSource'; $dir = opendir($indexSourceDir); while ($file = readdir($dir)) { if (is_dir($file)) { continue; } if (strcasecmp(substr($file, strlen($file) - 5), '.html') != 0) { continue; } // Create new Document from a file $doc = new FileDocument($indexSourceDir . '/' . $file, true); // Add document to the index $index->addDocument($doc);
<?php /** * ZSearch Indexing Example - Create a new index and add a * document to search. [Under Construction] * * ZSearch is an implementation of the Java Lucene engine * written entirely in PHP 5. It is fully compatible with the * binary format of the Lucene index files. * * Base installation of ZSearch requires no database server or * even installing a PHP extension, although added performance and * features are available if you can use one or both. */ /** * The second argument tells ZSearch to create a new index * instead of opening an existing one. */ $index = new ZSearch('/tmp/my_index', true); /** * Create a new document with two fields: title and body. Each * field will be independently searchable. */ $myDoc = new ZSearchDocument(); $myDoc->setField('title', 'ZSearch Example Title'); $myDoc->setField('body', 'The body of your document goes here.'); /** * Add the document to the index. It is now searchable. */ $index->addDocument($myDoc);
/** * Computes a score factor for a simple term or a phrase. * * The default implementation is: * return idfFreq(searcher.docFreq(term), searcher.maxDoc()); * * input - the term in question or array of terms * reader - reader the document collection being searched * Returns a score factor for the term * * @param mixed $input * @param ZSearch $reader * @return a score factor for the term */ public function idf($input, $reader) { if (!is_array($input)) { return $this->idfFreq($reader->docFreq($input), $reader->count()); } else { $idf = 0.0; foreach ($input as $term) { $idf += $this->idfFreq($reader->docFreq($term), $reader->count()); } return $idf; } }
<?php require_once 'ZSearch/ZSearch.php'; /** * ZSearch Simple Search Example * * ZSearch allows you to search all fields of all documents */ /** * Open the index for searching. */ $index = new ZSearch('/tmp/index'); /** * Query the index for documents that contain the term "zend studio" * but do not contain the term "install". */ $hits = $index->find('nntp'); /** * List titles of each matching document. * * All three methods shown below are equivalent, you only need to * get the objects if you really need them. */ foreach ($hits as $hit) { // get the value of the document's "title" field directly from the hit (shortcut) // this should be the most common usage echo $hit->title; // get the document object and the "title" field's value from it (shortcut) echo $hit->getDocument()->title . "\n"; // get the document object and then the "title" object and its value. echo $hit->getDocument()->getField('title')->getFieldValue() . "\n";