function run_count() { if (!defined('XAPIANDB') || !XAPIANDB) { return null; } $start = getmicrotime(); global $xapiandb; if (!$xapiandb) { $xapiandb = new XapianDatabase(XAPIANDB); } if (!$this->enquire) { $this->enquire = new XapianEnquire($xapiandb); } $queryparser = new XapianQueryParser(); $queryparser->set_stemming_strategy(QueryParser_STEM_NONE); $queryparser->set_default_op(Query_OP_AND); $queryparser->add_prefix("speaker", "speaker:"); $queryparser->add_prefix("major", "major:"); $queryparser->add_prefix('date', 'date:'); $queryparser->add_prefix('batch', 'batch:'); twfy_debug("SEARCH", "query remade -- " . $this->query_remade()); // We rebuild (with query_remade) our query and feed that text string to // the query parser. This is because the error handling in the query parser // is a bit knackered, and we want to be sure our highlighting etc. exactly // matches. XXX don't need to do this for more recent Xapians $query = $queryparser->parse_query($this->query_remade()); twfy_debug("SEARCH", "queryparser description -- " . $query->get_description()); $this->enquire->set_query($query); // Set collapsing and sorting global $PAGE; $collapsed = false; foreach ($this->prefixed as $items) { if ($items[0] == 'groupby') { $collapsed = true; if ($items[1] == 'day') { $this->enquire->set_collapse_key(2); } else { if ($items[1] == 'debate') { $this->enquire->set_collapse_key(3); } else { if ($items[1] == 'speech') { } else { $PAGE->error_message("Unknown group by '{$items['1']}' ignored"); } } } } elseif ($items[0] == 'bias') { list($weight, $halflife) = explode(":", $items[1]); $this->enquire->set_bias($weight, intval($halflife)); } elseif ($items[0] == 'speaker') { # Don't do any collapsing if we're searching for a person's speeches $collapsed = true; } } // default to grouping by subdebate, i.e. by page if (!$collapsed) { $this->enquire->set_collapse_key(7); } $matches = $this->enquire->get_mset(0, 500); // Take either: 1) the estimate which is sometimes too large or 2) the // size which is sometimes too low (it is limited to the 500 in the line // above). We get the exact mset we need later, according to which page // we are on. if ($matches->size() < 500) { $count = $matches->size(); } else { $count = $matches->get_matches_estimated(); } $duration = getmicrotime() - $start; twfy_debug("SEARCH", "Search count took {$duration} seconds."); return $count; }
function search($query, $num = 20) { $db = new XapianDatabase(XAPIAN_DIR); $enquire = new XapianEnquire($db); $stemmer = new XapianStem("english"); $qp = new XapianQueryParser(); $valuerange = new XapianNumberValueRangeProcessor(0); $qp->set_stemmer($stemmer); $qp->set_database($db); $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME); $qp->set_default_op(Query_OP_AND); $qp->add_boolean_prefix('align', 'A'); $qp->add_boolean_prefix('colour', 'C'); $qp->add_boolean_prefix('ep', 'E'); $qp->add_boolean_prefix('noise', 'N'); $qp->add_boolean_prefix('series', 'S'); $qp->add_valuerangeprocessor($valuerange); $query = $qp->parse_query($query, XapianQueryParser::FLAG_BOOLEAN | XapianQueryParser::FLAG_PHRASE | XapianQueryParser::FLAG_LOVEHATE | XapianQueryParser::FLAG_WILDCARD | XapianQueryParser::FLAG_SPELLING_CORRECTION); $enquire->set_query($query); $enquire->set_sort_by_value(1, true); $matches = $enquire->get_mset(0, $num); $desc = $query->get_description(); $estimate = $matches->get_matches_estimated(); $out = array(); $iter = $matches->begin(); while (!$iter->equals($matches->end())) { $doc = $iter->get_document(); $data = array('text' => $doc->get_data()); $rank = $iter->get_rank() + 1; $termiter = $doc->termlist_begin(); $terms = array(); while (!$termiter->equals($doc->termlist_end())) { $term = $termiter->get_term(); $prefix = substr($term, 0, 1); if ($prefix == 'A') { $data['align'] = substr($term, 1); } elseif ($prefix == 'B') { $data['begin'] = substr($term, 1); } elseif ($prefix == 'C') { $data['colour'] = substr($term, 1); } elseif ($prefix == 'E') { $data['ep'] = substr($term, 1); } elseif ($prefix == 'N') { $data['noise'] = substr($term, 1); } elseif ($prefix == 'I') { $data['pos'] = $term; } elseif ($prefix == 'S') { $data['series'] = substr($term, 1); } else { $data['terms'][] = $term; } $termiter->next(); } $out[] = $data; $iter->next(); } $db = null; return array( 'query' => $desc, 'estimate' => $estimate, 'data' => $out, ); }