/**
  * Return the document object for this hit
  *
  * @return Zend_Search_Lucene_Document
  */
 public function getDocument()
 {
     if (!$this->_document instanceof Zend_Search_Lucene_Document) {
         $this->_document = $this->_index->getDocument($this->id);
     }
     return $this->_document;
 }
Example #2
0
 function search($pQuery)
 {
     $this->mResults = array();
     if (!empty($pQuery) && $this->verifySearchIndex()) {
         parent::search($pQuery);
         require_zend_file('Search/Lucene.php');
         $index = new ZSearch($this->getField('index_path'));
         $fields = explode(',', $this->getField('index_fields'));
         $query = '';
         $lowQuery = strtolower($pQuery);
         foreach ($fields as $f) {
             $query .= "{$f}:{$lowQuery} OR ";
         }
         $query = preg_replace('/ OR $/', '', $query);
         $this->mResults = $index->find($query);
     }
     return count($this->mResults);
 }
 /**
  * The sum of squared weights of contained query clauses.
  *
  * @return float
  */
 public function sumOfSquaredWeights()
 {
     // compute idf
     $this->_idf = $this->_reader->getSimilarity()->idf($this->_term, $this->_reader);
     // compute query weight
     $this->_queryWeight = $this->_idf * $this->_query->getBoost();
     // square it
     return $this->_queryWeight * $this->_queryWeight;
 }
 /**
  * Score calculator for sloppy phrase queries (terms sequence is fixed)
  *
  * @param integer $docId
  * @param ZSearch $reader
  * @return float
  */
 public function _sloppyPhraseFreq($docId, ZSearch $reader)
 {
     $freq = 0;
     $phraseQueue = array();
     $phraseQueue[0] = array();
     // empty phrase
     $lastTerm = null;
     // Walk through the terms to create phrases.
     foreach ($this->_terms as $termId => $term) {
         $queueSize = count($phraseQueue);
         $firstPass = true;
         // Walk through the term positions.
         // Each term position produces a set of phrases.
         foreach ($this->_termsPositions[$termId][$docId] as $termPosition) {
             if ($firstPass) {
                 for ($count = 0; $count < $queueSize; $count++) {
                     $phraseQueue[$count][$termId] = $termPosition;
                 }
             } else {
                 for ($count = 0; $count < $queueSize; $count++) {
                     if ($lastTerm !== null && abs($termPosition - $phraseQueue[$count][$lastTerm] - ($this->_offsets[$termId] - $this->_offsets[$lastTerm])) > $this->_slop) {
                         continue;
                     }
                     $newPhraseId = count($phraseQueue);
                     $phraseQueue[$newPhraseId] = $phraseQueue[$count];
                     $phraseQueue[$newPhraseId][$termId] = $termPosition;
                 }
             }
             $firstPass = false;
         }
         $lastTerm = $termId;
     }
     foreach ($phraseQueue as $phrasePos) {
         $minDistance = null;
         for ($shift = -$this->_slop; $shift <= $this->_slop; $shift++) {
             $distance = 0;
             $start = reset($phrasePos) - reset($this->_offsets) + $shift;
             foreach ($this->_terms as $termId => $term) {
                 $distance += abs($phrasePos[$termId] - $this->_offsets[$termId] - $start);
                 if ($distance > $this->_slop) {
                     break;
                 }
             }
             if ($minDistance === null || $distance < $minDistance) {
                 $minDistance = $distance;
             }
         }
         if ($minDistance <= $this->_slop) {
             $freq += $reader->getSimilarity()->sloppyFreq($minDistance);
         }
     }
     return $freq;
 }
 /**
  * Score specified document
  *
  * @param integer $docId
  * @param ZSearch $reader
  * @return float
  */
 public function score($docId, $reader)
 {
     if ($this->_docVector === null) {
         if (extension_loaded('bitset')) {
             $this->_docVector = bitset_from_array($reader->termDocs($this->_term));
         } else {
             $this->_docVector = array_flip($reader->termDocs($this->_term));
         }
         $this->_termPositions = $reader->termPositions($this->_term);
         $this->_initWeight($reader);
     }
     $match = extension_loaded('bitset') ? bitset_in($this->_docVector, $docId) : isset($this->_docVector[$docId]);
     if ($this->_sign && $match) {
         return $reader->getSimilarity()->tf(count($this->_termPositions[$docId])) * $this->_weight->getValue() * $reader->norm($docId, $this->_term->field);
     } else {
         return 0;
     }
 }
 /**
  * Score calculator for non conjunction queries (not all terms are required)
  *
  * @param integer $docId
  * @param ZSearch $reader
  * @return float
  */
 public function _nonConjunctionScore($docId, $reader)
 {
     if ($this->_coord === null) {
         $this->_coord = array();
         $maxCoord = 0;
         foreach ($this->_signs as $sign) {
             if ($sign !== false) {
                 $maxCoord++;
             }
         }
         for ($count = 0; $count <= $maxCoord; $count++) {
             $this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
         }
     }
     $score = 0.0;
     $matchedTerms = 0;
     foreach ($this->_terms as $termId => $term) {
         // Check if term is
         if ($this->_signs[$termId] !== false && isset($this->_termsPositions[$termId][$docId])) {
             $matchedTerms++;
             $score += $reader->getSimilarity()->tf(count($this->_termsPositions[$termId][$docId])) * $this->_weights[$termId]->getValue() * $reader->norm($docId, $term->field);
         }
     }
     return $score * $this->_coord[$matchedTerms];
 }
Example #7
0
        $byteCount = filesize($fileName);
        $data = '';
        while ($byteCount > 0 && ($nextBlock = fread($f, $byteCount)) != false) {
            $data .= $nextBlock;
            $byteCount -= strlen($nextBlock);
        }
        fclose($f);
        if ($storeContent) {
            $this->addField(Zend_Search_Lucene_Field::Text('contents', $data));
        } else {
            $this->addField(Zend_Search_Lucene_Field::UnStored('contents', $data));
        }
    }
}
// Create index
$index = new ZSearch('index', true);
// Uncomment next line if you want to have case sensitive index
// ZSearchAnalyzer::setDefault(new ZSearchTextAnalyzer());
$indexSourceDir = 'IndexSource';
$dir = opendir($indexSourceDir);
while ($file = readdir($dir)) {
    if (is_dir($file)) {
        continue;
    }
    if (strcasecmp(substr($file, strlen($file) - 5), '.html') != 0) {
        continue;
    }
    // Create new Document from a file
    $doc = new FileDocument($indexSourceDir . '/' . $file, true);
    // Add document to the index
    $index->addDocument($doc);
<?php

/**
 * ZSearch Indexing Example - Create a new index and add a
 * document to search.  [Under Construction]
 *
 * ZSearch is an implementation of the Java Lucene engine
 * written entirely in PHP 5.  It is fully compatible with the
 * binary format of the Lucene index files.
 *
 * Base installation of ZSearch requires no database server or
 * even installing a PHP extension, although added performance and
 * features are available if you can use one or both.
 */
/**
 * The second argument tells ZSearch to create a new index
 * instead of opening an existing one.
 */
$index = new ZSearch('/tmp/my_index', true);
/**
 * Create a new document with two fields: title and body.  Each
 * field will be independently searchable.
 */
$myDoc = new ZSearchDocument();
$myDoc->setField('title', 'ZSearch Example Title');
$myDoc->setField('body', 'The body of your document goes here.');
/**
 * Add the document to the index.  It is now searchable.
 */
$index->addDocument($myDoc);
 /**
  * Computes a score factor for a simple term or a phrase.
  *
  * The default implementation is:
  *   return idfFreq(searcher.docFreq(term), searcher.maxDoc());
  *
  * input - the term in question or array of terms
  * reader - reader the document collection being searched
  * Returns a score factor for the term
  *
  * @param mixed $input
  * @param ZSearch $reader
  * @return a score factor for the term
  */
 public function idf($input, $reader)
 {
     if (!is_array($input)) {
         return $this->idfFreq($reader->docFreq($input), $reader->count());
     } else {
         $idf = 0.0;
         foreach ($input as $term) {
             $idf += $this->idfFreq($reader->docFreq($term), $reader->count());
         }
         return $idf;
     }
 }
<?php

require_once 'ZSearch/ZSearch.php';
/**
 * ZSearch Simple Search Example
 *
 * ZSearch allows you to search all fields of all documents
 */
/**
 * Open the index for searching.
 */
$index = new ZSearch('/tmp/index');
/**
 * Query the index for documents that contain the term "zend studio"
 * but do not contain the term "install".
 */
$hits = $index->find('nntp');
/**
 * List titles of each matching document.
 *
 * All three methods shown below are equivalent, you only need to
 * get the objects if you really need them.
 */
foreach ($hits as $hit) {
    // get the value of the document's "title" field directly from the hit (shortcut)
    // this should be the most common usage
    echo $hit->title;
    // get the document object and the "title" field's value from it (shortcut)
    echo $hit->getDocument()->title . "\n";
    // get the document object and then the "title" object and its value.
    echo $hit->getDocument()->getField('title')->getFieldValue() . "\n";