示例#1
0
 function run_count()
 {
     if (!defined('XAPIANDB') || !XAPIANDB) {
         return null;
     }
     $start = getmicrotime();
     global $xapiandb;
     if (!$xapiandb) {
         $xapiandb = new XapianDatabase(XAPIANDB);
     }
     if (!$this->enquire) {
         $this->enquire = new XapianEnquire($xapiandb);
     }
     $queryparser = new XapianQueryParser();
     $queryparser->set_stemming_strategy(QueryParser_STEM_NONE);
     $queryparser->set_default_op(Query_OP_AND);
     $queryparser->add_prefix("speaker", "speaker:");
     $queryparser->add_prefix("major", "major:");
     $queryparser->add_prefix('date', 'date:');
     $queryparser->add_prefix('batch', 'batch:');
     twfy_debug("SEARCH", "query remade -- " . $this->query_remade());
     // We rebuild (with query_remade) our query and feed that text string to
     // the query parser.  This is because the error handling in the query parser
     // is a bit knackered, and we want to be sure our highlighting etc. exactly
     // matches. XXX don't need to do this for more recent Xapians
     $query = $queryparser->parse_query($this->query_remade());
     twfy_debug("SEARCH", "queryparser description -- " . $query->get_description());
     $this->enquire->set_query($query);
     // Set collapsing and sorting
     global $PAGE;
     $collapsed = false;
     foreach ($this->prefixed as $items) {
         if ($items[0] == 'groupby') {
             $collapsed = true;
             if ($items[1] == 'day') {
                 $this->enquire->set_collapse_key(2);
             } else {
                 if ($items[1] == 'debate') {
                     $this->enquire->set_collapse_key(3);
                 } else {
                     if ($items[1] == 'speech') {
                     } else {
                         $PAGE->error_message("Unknown group by '{$items['1']}' ignored");
                     }
                 }
             }
         } elseif ($items[0] == 'bias') {
             list($weight, $halflife) = explode(":", $items[1]);
             $this->enquire->set_bias($weight, intval($halflife));
         } elseif ($items[0] == 'speaker') {
             # Don't do any collapsing if we're searching for a person's speeches
             $collapsed = true;
         }
     }
     // default to grouping by subdebate, i.e. by page
     if (!$collapsed) {
         $this->enquire->set_collapse_key(7);
     }
     $matches = $this->enquire->get_mset(0, 500);
     // Take either: 1) the estimate which is sometimes too large or 2) the
     // size which is sometimes too low (it is limited to the 500 in the line
     // above).  We get the exact mset we need later, according to which page
     // we are on.
     if ($matches->size() < 500) {
         $count = $matches->size();
     } else {
         $count = $matches->get_matches_estimated();
     }
     $duration = getmicrotime() - $start;
     twfy_debug("SEARCH", "Search count took {$duration} seconds.");
     return $count;
 }
示例#2
0
function search($query, $num = 20) {
	$db = new XapianDatabase(XAPIAN_DIR);
	$enquire = new XapianEnquire($db);

	$stemmer = new XapianStem("english");
	$qp = new XapianQueryParser();
	$valuerange = new XapianNumberValueRangeProcessor(0);

	$qp->set_stemmer($stemmer);
	$qp->set_database($db);
	$qp->set_stemming_strategy(XapianQueryParser::STEM_SOME);
	$qp->set_default_op(Query_OP_AND);
	$qp->add_boolean_prefix('align', 'A');
	$qp->add_boolean_prefix('colour', 'C');
	$qp->add_boolean_prefix('ep', 'E');
	$qp->add_boolean_prefix('noise', 'N');
	$qp->add_boolean_prefix('series', 'S');
	$qp->add_valuerangeprocessor($valuerange);

	$query = $qp->parse_query($query, XapianQueryParser::FLAG_BOOLEAN | XapianQueryParser::FLAG_PHRASE |
        	XapianQueryParser::FLAG_LOVEHATE | XapianQueryParser::FLAG_WILDCARD |
	        XapianQueryParser::FLAG_SPELLING_CORRECTION);

	$enquire->set_query($query);
	$enquire->set_sort_by_value(1, true);
	$matches = $enquire->get_mset(0, $num);

	$desc = $query->get_description();
	$estimate = $matches->get_matches_estimated();

	$out = array();
	$iter = $matches->begin();
	while (!$iter->equals($matches->end())) {
		$doc = $iter->get_document();
		$data = array('text' => $doc->get_data());
		$rank = $iter->get_rank() + 1;
		$termiter = $doc->termlist_begin();
		$terms = array();
		while (!$termiter->equals($doc->termlist_end())) {
			$term = $termiter->get_term();
			$prefix = substr($term, 0, 1);
			if ($prefix == 'A') {
				$data['align'] = substr($term, 1);
			} elseif ($prefix == 'B') {
				$data['begin'] = substr($term, 1);
			} elseif ($prefix == 'C') {
				$data['colour'] = substr($term, 1);
			} elseif ($prefix == 'E') {
				$data['ep'] = substr($term, 1);
			} elseif ($prefix == 'N') {
				$data['noise'] = substr($term, 1);
			} elseif ($prefix == 'I') {
				$data['pos'] = $term;
			} elseif ($prefix == 'S') {
				$data['series'] = substr($term, 1);
			} else {
				$data['terms'][] = $term;
			}
			$termiter->next();
		}
		$out[] = $data;
		$iter->next();
	}

	$db = null;

	return array(
		'query' => $desc,
		'estimate' => $estimate,
		'data' => $out,
	);
}