function parse_query($query_string, $flags = null, $default_prefix = null) { assert(func_num_args() == 1); // xapian php bindings seem to have some issue with the additional args... $query_string = preg_replace_callback("/(pubset:([-\\w0-9]+))/", "expand_pubset", $query_string); //print $query_string; return parent::parse_query($query_string); //,$flags,$default_prefix); }
function search($dbpath, $querystring, $offset = 0, $pagesize = 10) { // offset - defines starting point within result set // pagesize - defines number of records to retrieve // Open the database we're going to search. $db = new XapianDatabase($dbpath); ### Start of example code. // Set up a QueryParser with a stemmer and suitable prefixes $queryparser = new XapianQueryParser(); $queryparser->set_stemmer(new XapianStem("en")); $queryparser->set_stemming_strategy(XapianQueryParser::STEM_SOME); $queryparser->add_prefix("title", "S"); $queryparser->add_prefix("description", "XD"); $queryparser->add_boolean_prefix("material", "XM"); // And parse the query $query = $queryparser->parse_query($querystring); ### End of example code. // Use an Enquire object on the database to run the query $enquire = new XapianEnquire($db); $enquire->set_query($query); // Set up a spy to inspect the MAKER value at slot 1 $spy = new XapianValueCountMatchSpy(1); $enquire->add_matchspy($spy); // Retrieve the matches and compute start and end points $matches = $enquire->get_mset($offset, $pagesize); $start = $matches->begin(); $end = $matches->end(); // Use an array to record the DocIds of each match $docids = array(); while (!$start->equals($end)) { // retrieve the document and its data $doc = $start->get_document(); $fields = json_decode($doc->get_data()); $position = $start->get_rank() + 1; // record the docid $docid = $start->get_docid(); $docids[] = $docid; // display the results printf("%d: #%03d %s\n", $position, $docid, $fields->TITLE); // increment MSet iterator and our counter $start->next(); } // Parse and display the spy values $spy_start = $spy->values_begin(); $spy_end = $spy->values_end(); while (!$spy_start->equals($spy_end)) { print sprintf("Facet: %s; count: %d\n", $spy_start->get_term(), $spy_start->get_termfreq()); $spy_start->next(); } // Finally, make sure we log the query and displayed results log_info(sprintf("xapian.search:'%s'[%d:%d] = %s", $querystring, $offset, $offset + $pagesize, implode(" ", $docids))); }
// Start an enquire session. $enquire = new XapianEnquire($database); // Combine command line arguments up to "--" with spaces between // them, so that simple queries don't have to be quoted at the shell // level. $args = array_slice($argv, 2); $separator = array_search("--", $args); if ($separator === FALSE) { $separator = count($args); } $query_string = join(" ", array_slice($args, 0, $separator)); $rset = new XapianRSet(); foreach (array_slice($args, $separator + 1) as $docid) { $rset->add_document(intval($docid)); } $qp = new XapianQueryParser(); $stemmer = new XapianStem("english"); $qp->set_stemmer($stemmer); $qp->set_database($database); $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME); $query = $qp->parse_query($query_string); print "Parsed query is: {$query->get_description()}\n"; // Find the top 10 results for the query. $enquire->set_query($query); $matches = $enquire->get_mset(0, 10, $rset); // Display the results. print "{$matches->get_matches_estimated()} results found:\n"; foreach ($matches->begin() as $i => $docid) { $n = $i->get_rank() + 1; $data = $i->get_document()->get_data(); print "{$n}: {$i->get_percent()}% docid={$docid} [{$data}]\n\n";
$centre = new XapianLatLongCoords(); $centre->append(new XapianLatLongCoord(40.6048, -74.4427)); $ps = new XapianLatLongDistancePostingSource(COORD_SLOT, $centre, $metric, $range); $q = new XapianQuery("coffee"); $q = new XapianQuery(XapianQuery::OP_AND, $q, new XapianQuery($ps)); $enq = new XapianEnquire($db); $enq->set_query($q); $mset = $enq->get_mset(0, 10); if ($mset->size() != 1) { print "Expected one result with XapianLatLongDistancePostingSource, got "; print $mset->size() . "\n"; exit(1); } $s = ''; foreach ($db->allterms_begin() as $k => $term) { $s .= "({$term}:{$k->get_termfreq()})"; } if ($s !== '(coffee:1)') { print "PHP Iterator iteration of allterms doesn't work ({$s})\n"; exit(1); } # Test reference tracking and regression test for #659. $qp = new XapianQueryParser(); $stop = new XapianSimpleStopper(); $stop->add('a'); $qp->set_stopper($stop); $query = $qp->parse_query('a b'); if ($query->get_description() !== 'Query(b@2)') { print "XapianQueryParser::set_stopper() didn't work as expected - result was " . $query->get_description() . "\n"; exit(1); }
/** * * @brief xapian query * @param array $params * @param int $page * @param int $pagesize */ public static function searchHospital($disease = '', $province = '', $city = '', $district = '', $street = '', $level = 0, $keyword = '', $page = '', $pagesize = 20, $id = '') { $page = empty($page) ? 1 : $page; if (!self::_connect('/var/www/html/diary/xapian/hospital')) { return false; } // if ($keyword) { $realQuery = DBScws::cutWord($keyword, true); } // if ($disease) { $realQuery[] = 'DISEASE' . $disease; } // if ($province) { $realQuery[] = 'PROVINCE' . $province; } // if ($city) { $realQuery[] = 'CITY' . $city; } // if ($district) { $realQuery[] = 'DISTRICT' . $district; } // if ($street) { $realQuery[] = 'STREET' . $street; } // if ($level) { $realQuery[] = 'LEVEL' . $level; } //id if ($id) { $realQuery[] = 'ID' . $id; } if (empty($realQuery)) { $realQuery[] = 'HOSPITAL' . 'default'; } $queryparser = new XapianQueryParser(); $queryparser->set_database(self::$_INSTANCE); $enquire = new XapianEnquire(self::$_INSTANCE); $enquire->set_sort_by_value(0); // $query = new XapianQuery(XapianQuery::OP_AND, $realQuery); $enquire->set_query($query); $matches = $enquire->get_mset(0, 4000); // $start = $matches->begin(); $end = $matches->end(); $count = $matches->size(); $index = 0; $re = array(); while (!$start->equals($end)) { $data = array(); if ($index < $page * $pagesize && $index >= ($page - 1) * $pagesize) { $doc = $start->get_document(); $result = json_decode($doc->get_data(), true); $result['phone'] = $result['contact']; $r = explode("||", $result['title']); $result['title'] = $r[0]; $r = explode(",", $result['contact']); $result['contact'] = $r[0]; unset($result['puid'], $result['thumb_img'], $result['score'], $result['website'], $result['post_at'], $result['refresh_at'], $result['grab_url'], $result['ad_status'], $result['ad_types'], $result['user_id'], $result['username'], $result['listing_status'], $result['base_tag'], $result['image_count']); $re[] = $result; /* $termStart = $doc->termlist_begin(); $termEnd = $doc->termlist_end(); $d['term']=''; while (!($termStart->equals($termEnd))) { $d['term'].= '|'.$termStart->get_term(); $termStart->next(); } var_dump($d); */ } elseif ($index >= $page * $pagesize) { break; } $start->next(); $index++; } return array($re, $count); }
function run_count() { if (!defined('XAPIANDB') || !XAPIANDB) { return null; } $start = getmicrotime(); global $xapiandb; if (!$xapiandb) { $xapiandb = new XapianDatabase(XAPIANDB); } if (!$this->enquire) { $this->enquire = new XapianEnquire($xapiandb); } $queryparser = new XapianQueryParser(); $queryparser->set_stemming_strategy(QueryParser_STEM_NONE); $queryparser->set_default_op(Query_OP_AND); $queryparser->add_prefix("speaker", "speaker:"); $queryparser->add_prefix("major", "major:"); $queryparser->add_prefix('date', 'date:'); $queryparser->add_prefix('batch', 'batch:'); twfy_debug("SEARCH", "query remade -- " . $this->query_remade()); // We rebuild (with query_remade) our query and feed that text string to // the query parser. This is because the error handling in the query parser // is a bit knackered, and we want to be sure our highlighting etc. exactly // matches. XXX don't need to do this for more recent Xapians $query = $queryparser->parse_query($this->query_remade()); twfy_debug("SEARCH", "queryparser description -- " . $query->get_description()); $this->enquire->set_query($query); // Set collapsing and sorting global $PAGE; $collapsed = false; foreach ($this->prefixed as $items) { if ($items[0] == 'groupby') { $collapsed = true; if ($items[1] == 'day') { $this->enquire->set_collapse_key(2); } else { if ($items[1] == 'debate') { $this->enquire->set_collapse_key(3); } else { if ($items[1] == 'speech') { } else { $PAGE->error_message("Unknown group by '{$items['1']}' ignored"); } } } } elseif ($items[0] == 'bias') { list($weight, $halflife) = explode(":", $items[1]); $this->enquire->set_bias($weight, intval($halflife)); } elseif ($items[0] == 'speaker') { # Don't do any collapsing if we're searching for a person's speeches $collapsed = true; } } // default to grouping by subdebate, i.e. by page if (!$collapsed) { $this->enquire->set_collapse_key(7); } $matches = $this->enquire->get_mset(0, 500); // Take either: 1) the estimate which is sometimes too large or 2) the // size which is sometimes too low (it is limited to the 500 in the line // above). We get the exact mset we need later, according to which page // we are on. if ($matches->size() < 500) { $count = $matches->size(); } else { $count = $matches->get_matches_estimated(); } $duration = getmicrotime() - $start; twfy_debug("SEARCH", "Search count took {$duration} seconds."); return $count; }
/** * Queries the database. * The xapian_query function queries the database using both a query string * and application-defined terms. Based on drupal-xapian * * @param string $query_string The search string. This string will * be parsed and stemmed automatically. * @param XapianDatabase $db Xapian database to connect * @param int $start An integer defining the first * document to return * @param int $length The number of results to return. * @param array $extra An array containing arrays of * extra terms to search for. * @param int $count_type Number of items to retrieve * @return array An array of nids corresponding to the results. */ function xapian_query($query_string, $db = NULL, $start = 0, $length = 10, $extra = array(), $count_type = 0) { try { if (!is_object($db)) { $db = new XapianDatabase(XAPIAN_DB); } // Build subqueries from $extra array. Now only used by tags search filter on search widget $subqueries = array(); foreach ($extra as $subquery) { if (!empty($subquery)) { $subqueries[] = new XapianQuery($subquery); } } $query = NULL; $enquire = new XapianEnquire($db); if (!empty($query_string)) { $query_parser = new XapianQueryParser(); //TODO: choose stemmer $stemmer = new XapianStem("english"); $query_parser->set_stemmer($stemmer); $query_parser->set_database($db); $query_parser->set_stemming_strategy(XapianQueryParser::STEM_SOME); $query_parser->add_boolean_prefix('courseid', XAPIAN_PREFIX_COURSEID); $query_parser->add_boolean_prefix('toolid', XAPIAN_PREFIX_TOOLID); $query = $query_parser->parse_query($query_string); $final_array = array_merge($subqueries, array($query)); $query = new XapianQuery(XapianQuery::OP_AND, $final_array); } else { $query = new XapianQuery(XapianQuery::OP_OR, $subqueries); } $enquire->set_query($query); $matches = $enquire->get_mset((int) $start, (int) $length); $specific_fields = get_specific_field_list(); $results = array(); $i = $matches->begin(); // Display the results. //echo $matches->get_matches_estimated().'results found'; $count = 0; while (!$i->equals($matches->end())) { $count++; $document = $i->get_document(); if (is_object($document)) { // process one item terms $courseid_terms = xapian_get_doc_terms($document, XAPIAN_PREFIX_COURSEID); $results[$count]['courseid'] = substr($courseid_terms[0]['name'], 1); $toolid_terms = xapian_get_doc_terms($document, XAPIAN_PREFIX_TOOLID); $results[$count]['toolid'] = substr($toolid_terms[0]['name'], 1); // process each specific field prefix foreach ($specific_fields as $specific_field) { $results[$count]['sf-' . $specific_field['code']] = xapian_get_doc_terms($document, $specific_field['code']); } // rest of data $results[$count]['xapian_data'] = unserialize($document->get_data()); $results[$count]['score'] = $i->get_percent(); } $i->next(); } switch ($count_type) { case 1: // Lower bound $count = $matches->get_matches_lower_bound(); break; case 2: // Upper bound $count = $matches->get_matches_upper_bound(); break; case 0: // Best estimate // Best estimate default: $count = $matches->get_matches_estimated(); break; } return array($count, $results); } catch (Exception $e) { display_xapian_error($e->getMessage()); return NULL; } }
function tests(){ include_once("ressources/class.xapian.inc"); // Open the database for searching. try { $database = new XapianDatabase("/home/dtouzeau/Documents/doc1.db"); $database1=new XapianDatabase("/home/dtouzeau/Documents/doc1.db"); $database->add_database($database1); // Start an enquire session. $enquire = new XapianEnquire($database); // Combine the rest of the command line arguments with spaces between // them, so that simple queries don't have to be quoted at the shell // level. $query_string = "david"; $qp = new XapianQueryParser(); $stemmer = new XapianStem("english"); $qp->set_stemmer($stemmer); $qp->set_database($database); $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME); $query = $qp->parse_query($query_string); print "Parsed query is: {$query->get_description()}\n"; // Find the top 10 results for the query. $enquire->set_query($query); $matches = $enquire->get_mset(0, 10); // Display the results. print "{$matches->get_matches_estimated()} results found:\n"; $i = $matches->begin(); while (!$i->equals($matches->end())) { $n = $i->get_rank() + 1; $data = $i->get_document()->get_data(); print "$n: {$i->get_percent()}% docid={$i->get_docid()} [$data]\n\n"; $i->next(); } } catch (Exception $e) { print $e->getMessage() . "\n"; exit(1); } }
if ($mset->get_docid(0) != 2) { print "MatchDecider mset has wrong docid in\n"; } if (XapianQuery::OP_ELITE_SET != 10) { print "OP_ELITE_SET is XapianQuery::OP_ELITE_SET not 10\n"; exit(1); } # Regression test - overload resolution involving boolean types failed. $enq->set_sort_by_value(1, TRUE); # Regression test - fixed in 0.9.10.1. $oqparser = new XapianQueryParser(); $oquery = $oqparser->parse_query("I like tea"); # Regression test for bug#192 - fixed in 1.0.3. $enq->set_cutoff(100); # Check DateValueRangeProcessor works. $qp = new XapianQueryParser(); $vrpdate = new XapianDateValueRangeProcessor(1, 1, 1960); $qp->add_valuerangeprocessor($vrpdate); $query = $qp->parse_query('12/03/99..12/04/01'); if ($query->get_description() !== 'Xapian::Query(VALUE_RANGE 1 19991203 20011204)') { print "XapianDateValueRangeProcessor didn't work - result was " . $query->get_description() . "\n"; exit(1); } # Test setting and getting metadata if ($db->get_metadata('Foo') !== '') { print "Unexpected value for metadata associated with 'Foo' (expected ''): '" . $db->get_metadata('Foo') . "'\n"; exit(1); } $db->set_metadata('Foo', 'Foo'); if ($db->get_metadata('Foo') !== 'Foo') { print "Unexpected value for metadata associated with 'Foo' (expected 'Foo'): '" . $db->get_metadata('Foo') . "'\n";
function search($dbpath, $querystring, $materials, $offset = 0, $pagesize = 10) { // offset - defines starting point within result set // pagesize - defines number of records to retrieve // Open the database we're going to search. $db = new XapianDatabase($dbpath); ### Start of example code. // Set up a QueryParser with a stemmer and suitable prefixes $queryparser = new XapianQueryParser(); $queryparser->set_stemmer(new XapianStem("english")); $queryparser->set_stem_strategy(XapianQueryParser::STEM_SOME); $queryparser->add_prefix("title", "S"); $queryparser->add_prefix("description", "XD"); // And parse the query $query = $queryparser->parse_query($querystring); if (empty($materials) === false) { // Filter the results to ones which contain at least one of the // materials. $material_queries = array(); // Build a query for each material value foreach ($materials as $material) { $material = str_replace("material:", "", $material); $material_queries[] = new XapianQuery('XM' . strtolower($material)); } // Combine these queries with an OR operator $material_query = new XapianQuery(XapianQuery::OP_AND, $material_queries); // Use the material query to filter the main query $query = new XapianQuery(XapianQuery::OP_FILTER, $query, $material_query); } ### End of example code. // Use an Enquire object on the database to run the query $enquire = new XapianEnquire($db); $enquire->set_query($query); // Set up a spy to inspect the MAKER value at slot 1 $spy = new XapianValueCountMatchSpy(1); $enquire->add_matchspy($spy); // Retrieve the matches and compute start and end points $matches = $enquire->get_mset($offset, $pagesize); $start = $matches->begin(); $end = $matches->end(); $index = 0; // Use an array to record the DocIds of each match $docids = array(); while (!$start->equals($end)) { // retrieve the document and its data $doc = $start->get_document(); $fields = json_decode($doc->get_data()); $position = $offset + $index + 1; // record the docid $docid = $start->get_docid(); $docids[] = $docid; // display the results print sprintf("%d: #%03d %s\n", $position, $docid, $fields->TITLE); // increment MSet iterator and our counter $start->next(); $index++; } // Parse and display the spy values $spy_start = $spy->values_begin(); $spy_end = $spy->values_end(); while (!$spy_start->equals($spy_end)) { print sprintf("Facet: %s; count: %d\n", $spy_start->get_term(), $spy_start->get_termfreq()); $spy_start->next(); } // Finally, make sure we log the query and displayed results log_info(sprintf("xapian.search:'%s'[%d:%d] = %s", $querystring, $offset, $offset + $pagesize, implode(" ", $docids))); }
public function search($query_string) { $database = new XapianDatabase(self::$_database_path); // Start an enquire session. $enquire = new XapianEnquire($database); $qp = new XapianQueryParser(); $stemmer = new XapianStem("english"); $qp->set_stemmer($stemmer); $qp->set_database($database); $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME); $query = $qp->parse_query($query_string); // Find the top 10 results for the query. $enquire->set_query($query); $enquire->set_collapse_key(0, 1); //index '0' holds the file path, so we're collapsing on that value in order for a single value to be returned by the system $matches = $enquire->get_mset(0, $database->get_doccount()); $i = $matches->begin(); $results = array(); while (!$i->equals($matches->end())) { $n = $i->get_rank() + 1; try { $fileobj = new EfrontFile($i->get_document()->get_value('file')); $results[] = array('id' => $fileobj['id'], 'path' => str_replace(G_ROOTPATH, '', $fileobj['path']), 'login' => $fileobj['users_LOGIN'] ? $fileobj['users_LOGIN'] : '', 'date' => formatTimestamp(filemtime($fileobj['path']), 'time_nosec'), 'name' => $fileobj['name'], 'extension' => $fileobj['extension'], 'score' => $i->get_percent(), 'content' => $i->get_document()->get_data(), 'icon' => $fileobj->getTypeImage()); } catch (Exception $e) { //don't halt for missing files } $i->next(); } return $results; }
/** * Return a list of IDs for the given search criters * * @param string $criteria * @param int $limit * @param int $offset * @return array */ public function get_by_criteria($criteria, $limit, $offset) { $qp = new XapianQueryParser(); $enquire = new XapianEnquire($this->_database); if ($this->get_stem_locale()) { // Note, there may be a problem if this is different than at indexing time! $stemmer = new XapianStem($this->get_stem_locale()); $qp->set_stemmer($stemmer); $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME); } $qp->set_database($this->_database); $query = $qp->parse_query($criteria, XapianQueryParser::FLAG_SPELLING_CORRECTION); $enquire->set_query($query); $this->_spelling = $qp->get_corrected_query_string(); $matches = $enquire->get_mset($offset, $limit); // TODO: get count from $matches->get_matches_estimated() instead of current method $i = $matches->begin(); $ids = array(); while (!$i->equals($matches->end())) { $n = $i->get_rank() + 1; $ids[] = $i->get_document()->get_value(self::XAPIAN_FIELD_ID); $i->next(); } return $ids; }
# Check PHP4 handling of Xapian::DocNotFoundError $old_error_reporting = error_reporting(); if ($old_error_reporting & E_WARNING) { error_reporting($old_error_reporting ^ E_WARNING); } $doc2 = $db->get_document(2); if ($doc2 != null) { print "Retrieved non-existent document\n"; exit(1); } if ($last_exception !== "DocNotFoundError: Docid 2 not found") { print "Exception string not as expected, got: '{$last_exception}'\n"; exit(1); } # Check QueryParser parsing error. $qp = new XapianQueryParser(); $qp->parse_query("test AND"); if ($last_exception !== "QueryParserError: Syntax: <expression> AND <expression>") { print "Exception string not as expected, got: '{$last_exception}'\n"; exit(1); } if ($old_error_reporting & E_WARNING) { error_reporting($old_error_reporting); } set_error_handler($old_errhandler); # Regression test for bug#193, fixed in 1.0.3. $vrp = new XapianNumberValueRangeProcessor(0, '$', true); $a = '$10'; $b = '20'; $vrp->apply($a, $b); if (xapian_sortable_unserialise($a) != 10) {
function search($query, $num = 20) { $db = new XapianDatabase(XAPIAN_DIR); $enquire = new XapianEnquire($db); $stemmer = new XapianStem("english"); $qp = new XapianQueryParser(); $valuerange = new XapianNumberValueRangeProcessor(0); $qp->set_stemmer($stemmer); $qp->set_database($db); $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME); $qp->set_default_op(Query_OP_AND); $qp->add_boolean_prefix('align', 'A'); $qp->add_boolean_prefix('colour', 'C'); $qp->add_boolean_prefix('ep', 'E'); $qp->add_boolean_prefix('noise', 'N'); $qp->add_boolean_prefix('series', 'S'); $qp->add_valuerangeprocessor($valuerange); $query = $qp->parse_query($query, XapianQueryParser::FLAG_BOOLEAN | XapianQueryParser::FLAG_PHRASE | XapianQueryParser::FLAG_LOVEHATE | XapianQueryParser::FLAG_WILDCARD | XapianQueryParser::FLAG_SPELLING_CORRECTION); $enquire->set_query($query); $enquire->set_sort_by_value(1, true); $matches = $enquire->get_mset(0, $num); $desc = $query->get_description(); $estimate = $matches->get_matches_estimated(); $out = array(); $iter = $matches->begin(); while (!$iter->equals($matches->end())) { $doc = $iter->get_document(); $data = array('text' => $doc->get_data()); $rank = $iter->get_rank() + 1; $termiter = $doc->termlist_begin(); $terms = array(); while (!$termiter->equals($doc->termlist_end())) { $term = $termiter->get_term(); $prefix = substr($term, 0, 1); if ($prefix == 'A') { $data['align'] = substr($term, 1); } elseif ($prefix == 'B') { $data['begin'] = substr($term, 1); } elseif ($prefix == 'C') { $data['colour'] = substr($term, 1); } elseif ($prefix == 'E') { $data['ep'] = substr($term, 1); } elseif ($prefix == 'N') { $data['noise'] = substr($term, 1); } elseif ($prefix == 'I') { $data['pos'] = $term; } elseif ($prefix == 'S') { $data['series'] = substr($term, 1); } else { $data['terms'][] = $term; } $termiter->next(); } $out[] = $data; $iter->next(); } $db = null; return array( 'query' => $desc, 'estimate' => $estimate, 'data' => $out, ); }