function search($dbpath, $querystring, $offset = 0, $pagesize = 10) { // offset - defines starting point within result set // pagesize - defines number of records to retrieve // Open the database we're going to search. $db = new XapianDatabase($dbpath); ### Start of example code. // Set up a QueryParser with a stemmer and suitable prefixes $queryparser = new XapianQueryParser(); $queryparser->set_stemmer(new XapianStem("en")); $queryparser->set_stemming_strategy(XapianQueryParser::STEM_SOME); $queryparser->add_prefix("title", "S"); $queryparser->add_prefix("description", "XD"); $queryparser->add_boolean_prefix("material", "XM"); // And parse the query $query = $queryparser->parse_query($querystring); ### End of example code. // Use an Enquire object on the database to run the query $enquire = new XapianEnquire($db); $enquire->set_query($query); // Set up a spy to inspect the MAKER value at slot 1 $spy = new XapianValueCountMatchSpy(1); $enquire->add_matchspy($spy); // Retrieve the matches and compute start and end points $matches = $enquire->get_mset($offset, $pagesize); $start = $matches->begin(); $end = $matches->end(); // Use an array to record the DocIds of each match $docids = array(); while (!$start->equals($end)) { // retrieve the document and its data $doc = $start->get_document(); $fields = json_decode($doc->get_data()); $position = $start->get_rank() + 1; // record the docid $docid = $start->get_docid(); $docids[] = $docid; // display the results printf("%d: #%03d %s\n", $position, $docid, $fields->TITLE); // increment MSet iterator and our counter $start->next(); } // Parse and display the spy values $spy_start = $spy->values_begin(); $spy_end = $spy->values_end(); while (!$spy_start->equals($spy_end)) { print sprintf("Facet: %s; count: %d\n", $spy_start->get_term(), $spy_start->get_termfreq()); $spy_start->next(); } // Finally, make sure we log the query and displayed results log_info(sprintf("xapian.search:'%s'[%d:%d] = %s", $querystring, $offset, $offset + $pagesize, implode(" ", $docids))); }
/** * Queries the database. * The xapian_query function queries the database using both a query string * and application-defined terms. Based on drupal-xapian * * @param string $query_string The search string. This string will * be parsed and stemmed automatically. * @param XapianDatabase $db Xapian database to connect * @param int $start An integer defining the first * document to return * @param int $length The number of results to return. * @param array $extra An array containing arrays of * extra terms to search for. * @param int $count_type Number of items to retrieve * @return array An array of nids corresponding to the results. */ function xapian_query($query_string, $db = NULL, $start = 0, $length = 10, $extra = array(), $count_type = 0) { try { if (!is_object($db)) { $db = new XapianDatabase(XAPIAN_DB); } // Build subqueries from $extra array. Now only used by tags search filter on search widget $subqueries = array(); foreach ($extra as $subquery) { if (!empty($subquery)) { $subqueries[] = new XapianQuery($subquery); } } $query = NULL; $enquire = new XapianEnquire($db); if (!empty($query_string)) { $query_parser = new XapianQueryParser(); //TODO: choose stemmer $stemmer = new XapianStem("english"); $query_parser->set_stemmer($stemmer); $query_parser->set_database($db); $query_parser->set_stemming_strategy(XapianQueryParser::STEM_SOME); $query_parser->add_boolean_prefix('courseid', XAPIAN_PREFIX_COURSEID); $query_parser->add_boolean_prefix('toolid', XAPIAN_PREFIX_TOOLID); $query = $query_parser->parse_query($query_string); $final_array = array_merge($subqueries, array($query)); $query = new XapianQuery(XapianQuery::OP_AND, $final_array); } else { $query = new XapianQuery(XapianQuery::OP_OR, $subqueries); } $enquire->set_query($query); $matches = $enquire->get_mset((int) $start, (int) $length); $specific_fields = get_specific_field_list(); $results = array(); $i = $matches->begin(); // Display the results. //echo $matches->get_matches_estimated().'results found'; $count = 0; while (!$i->equals($matches->end())) { $count++; $document = $i->get_document(); if (is_object($document)) { // process one item terms $courseid_terms = xapian_get_doc_terms($document, XAPIAN_PREFIX_COURSEID); $results[$count]['courseid'] = substr($courseid_terms[0]['name'], 1); $toolid_terms = xapian_get_doc_terms($document, XAPIAN_PREFIX_TOOLID); $results[$count]['toolid'] = substr($toolid_terms[0]['name'], 1); // process each specific field prefix foreach ($specific_fields as $specific_field) { $results[$count]['sf-' . $specific_field['code']] = xapian_get_doc_terms($document, $specific_field['code']); } // rest of data $results[$count]['xapian_data'] = unserialize($document->get_data()); $results[$count]['score'] = $i->get_percent(); } $i->next(); } switch ($count_type) { case 1: // Lower bound $count = $matches->get_matches_lower_bound(); break; case 2: // Upper bound $count = $matches->get_matches_upper_bound(); break; case 0: // Best estimate // Best estimate default: $count = $matches->get_matches_estimated(); break; } return array($count, $results); } catch (Exception $e) { display_xapian_error($e->getMessage()); return NULL; } }
function search($query, $num = 20) { $db = new XapianDatabase(XAPIAN_DIR); $enquire = new XapianEnquire($db); $stemmer = new XapianStem("english"); $qp = new XapianQueryParser(); $valuerange = new XapianNumberValueRangeProcessor(0); $qp->set_stemmer($stemmer); $qp->set_database($db); $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME); $qp->set_default_op(Query_OP_AND); $qp->add_boolean_prefix('align', 'A'); $qp->add_boolean_prefix('colour', 'C'); $qp->add_boolean_prefix('ep', 'E'); $qp->add_boolean_prefix('noise', 'N'); $qp->add_boolean_prefix('series', 'S'); $qp->add_valuerangeprocessor($valuerange); $query = $qp->parse_query($query, XapianQueryParser::FLAG_BOOLEAN | XapianQueryParser::FLAG_PHRASE | XapianQueryParser::FLAG_LOVEHATE | XapianQueryParser::FLAG_WILDCARD | XapianQueryParser::FLAG_SPELLING_CORRECTION); $enquire->set_query($query); $enquire->set_sort_by_value(1, true); $matches = $enquire->get_mset(0, $num); $desc = $query->get_description(); $estimate = $matches->get_matches_estimated(); $out = array(); $iter = $matches->begin(); while (!$iter->equals($matches->end())) { $doc = $iter->get_document(); $data = array('text' => $doc->get_data()); $rank = $iter->get_rank() + 1; $termiter = $doc->termlist_begin(); $terms = array(); while (!$termiter->equals($doc->termlist_end())) { $term = $termiter->get_term(); $prefix = substr($term, 0, 1); if ($prefix == 'A') { $data['align'] = substr($term, 1); } elseif ($prefix == 'B') { $data['begin'] = substr($term, 1); } elseif ($prefix == 'C') { $data['colour'] = substr($term, 1); } elseif ($prefix == 'E') { $data['ep'] = substr($term, 1); } elseif ($prefix == 'N') { $data['noise'] = substr($term, 1); } elseif ($prefix == 'I') { $data['pos'] = $term; } elseif ($prefix == 'S') { $data['series'] = substr($term, 1); } else { $data['terms'][] = $term; } $termiter->next(); } $out[] = $data; $iter->next(); } $db = null; return array( 'query' => $desc, 'estimate' => $estimate, 'data' => $out, ); }