function search($term) { #$qp = new XapianQueryParser(); #$qp->set_database($db); #$stemmer = new XapianStem("english"); #$qp->set_stemmer($stemmer); #$query = $qp->parse_query($term); $query = new XapianQuery($term); $enquire = new XapianEnquire($this->db); $enquire->set_query($query); $matches = $enquire->get_mset(0, 25); if (0 /*SCORING*/) { $scores = array(); for ($i = $matches->begin(); !$i->equals($matches->end()); $i->next()) { $row = $i->get_document(); $str = $i->get_percent()."% [".$row->get_data()."]"; $scores[] = $str; if (0/*DEBUG*/) wfDebug("$str\n"); } } $result = array(); for ($i = $matches->begin(); !$i->equals($matches->end()); $i->next()) { $entry = $i->get_document()->get_data(); $result[] = explode(':', $entry, 3); } # not available in Xapian 1.0.X: #$db->close(); return $result; }
function search($dbpath, $querystring, $offset = 0, $pagesize = 10) { // offset - defines starting point within result set // pagesize - defines number of records to retrieve // Open the database we're going to search. $db = new XapianDatabase($dbpath); ### Start of example code. // Set up a QueryParser with a stemmer and suitable prefixes $queryparser = new XapianQueryParser(); $queryparser->set_stemmer(new XapianStem("en")); $queryparser->set_stemming_strategy(XapianQueryParser::STEM_SOME); $queryparser->add_prefix("title", "S"); $queryparser->add_prefix("description", "XD"); $queryparser->add_boolean_prefix("material", "XM"); // And parse the query $query = $queryparser->parse_query($querystring); ### End of example code. // Use an Enquire object on the database to run the query $enquire = new XapianEnquire($db); $enquire->set_query($query); // Set up a spy to inspect the MAKER value at slot 1 $spy = new XapianValueCountMatchSpy(1); $enquire->add_matchspy($spy); // Retrieve the matches and compute start and end points $matches = $enquire->get_mset($offset, $pagesize); $start = $matches->begin(); $end = $matches->end(); // Use an array to record the DocIds of each match $docids = array(); while (!$start->equals($end)) { // retrieve the document and its data $doc = $start->get_document(); $fields = json_decode($doc->get_data()); $position = $start->get_rank() + 1; // record the docid $docid = $start->get_docid(); $docids[] = $docid; // display the results printf("%d: #%03d %s\n", $position, $docid, $fields->TITLE); // increment MSet iterator and our counter $start->next(); } // Parse and display the spy values $spy_start = $spy->values_begin(); $spy_end = $spy->values_end(); while (!$spy_start->equals($spy_end)) { print sprintf("Facet: %s; count: %d\n", $spy_start->get_term(), $spy_start->get_termfreq()); $spy_start->next(); } // Finally, make sure we log the query and displayed results log_info(sprintf("xapian.search:'%s'[%d:%d] = %s", $querystring, $offset, $offset + $pagesize, implode(" ", $docids))); }
$separator = count($args); } $query_string = join(" ", array_slice($args, 0, $separator)); $rset = new XapianRSet(); foreach (array_slice($args, $separator + 1) as $docid) { $rset->add_document(intval($docid)); } $qp = new XapianQueryParser(); $stemmer = new XapianStem("english"); $qp->set_stemmer($stemmer); $qp->set_database($database); $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME); $query = $qp->parse_query($query_string); print "Parsed query is: {$query->get_description()}\n"; // Find the top 10 results for the query. $enquire->set_query($query); $matches = $enquire->get_mset(0, 10, $rset); // Display the results. print "{$matches->get_matches_estimated()} results found:\n"; foreach ($matches->begin() as $i => $docid) { $n = $i->get_rank() + 1; $data = $i->get_document()->get_data(); print "{$n}: {$i->get_percent()}% docid={$docid} [{$data}]\n\n"; } // If no relevant docids were given, invent an RSet containing the top 5 // matches (or all the matches if there are less than 5). if ($rset->is_empty()) { $c = 5; foreach ($matches->begin() as $docid) { $rset->add_document($docid); if (--$c) {
$centre = new XapianLatLongCoords($coord); $query = new XapianQuery(new XapianLatLongDistancePostingSource(COORD_SLOT, $centre, $metric, $range)); $db = Xapian::inmemory_open(); $coords = new XapianLatLongCoords(); $coords->append(new XapianLatLongCoord(40.6048, -74.4427)); $doc = new XapianDocument(); $doc->add_term("coffee"); $doc->add_value(COORD_SLOT, $coords->serialise()); $db->add_document($doc); $centre = new XapianLatLongCoords(); $centre->append(new XapianLatLongCoord(40.6048, -74.4427)); $ps = new XapianLatLongDistancePostingSource(COORD_SLOT, $centre, $metric, $range); $q = new XapianQuery("coffee"); $q = new XapianQuery(XapianQuery::OP_AND, $q, new XapianQuery($ps)); $enq = new XapianEnquire($db); $enq->set_query($q); $mset = $enq->get_mset(0, 10); if ($mset->size() != 1) { print "Expected one result with XapianLatLongDistancePostingSource, got "; print $mset->size() . "\n"; exit(1); } $s = ''; foreach ($db->allterms_begin() as $k => $term) { $s .= "({$term}:{$k->get_termfreq()})"; } if ($s !== '(coffee:1)') { print "PHP Iterator iteration of allterms doesn't work ({$s})\n"; exit(1); } # Test reference tracking and regression test for #659.
/** * * @brief xapian query * @param array $params * @param int $page * @param int $pagesize */ public static function searchHospital($disease = '', $province = '', $city = '', $district = '', $street = '', $level = 0, $keyword = '', $page = '', $pagesize = 20, $id = '') { $page = empty($page) ? 1 : $page; if (!self::_connect('/var/www/html/diary/xapian/hospital')) { return false; } // if ($keyword) { $realQuery = DBScws::cutWord($keyword, true); } // if ($disease) { $realQuery[] = 'DISEASE' . $disease; } // if ($province) { $realQuery[] = 'PROVINCE' . $province; } // if ($city) { $realQuery[] = 'CITY' . $city; } // if ($district) { $realQuery[] = 'DISTRICT' . $district; } // if ($street) { $realQuery[] = 'STREET' . $street; } // if ($level) { $realQuery[] = 'LEVEL' . $level; } //id if ($id) { $realQuery[] = 'ID' . $id; } if (empty($realQuery)) { $realQuery[] = 'HOSPITAL' . 'default'; } $queryparser = new XapianQueryParser(); $queryparser->set_database(self::$_INSTANCE); $enquire = new XapianEnquire(self::$_INSTANCE); $enquire->set_sort_by_value(0); // $query = new XapianQuery(XapianQuery::OP_AND, $realQuery); $enquire->set_query($query); $matches = $enquire->get_mset(0, 4000); // $start = $matches->begin(); $end = $matches->end(); $count = $matches->size(); $index = 0; $re = array(); while (!$start->equals($end)) { $data = array(); if ($index < $page * $pagesize && $index >= ($page - 1) * $pagesize) { $doc = $start->get_document(); $result = json_decode($doc->get_data(), true); $result['phone'] = $result['contact']; $r = explode("||", $result['title']); $result['title'] = $r[0]; $r = explode(",", $result['contact']); $result['contact'] = $r[0]; unset($result['puid'], $result['thumb_img'], $result['score'], $result['website'], $result['post_at'], $result['refresh_at'], $result['grab_url'], $result['ad_status'], $result['ad_types'], $result['user_id'], $result['username'], $result['listing_status'], $result['base_tag'], $result['image_count']); $re[] = $result; /* $termStart = $doc->termlist_begin(); $termEnd = $doc->termlist_end(); $d['term']=''; while (!($termStart->equals($termEnd))) { $d['term'].= '|'.$termStart->get_term(); $termStart->next(); } var_dump($d); */ } elseif ($index >= $page * $pagesize) { break; } $start->next(); $index++; } return array($re, $count); }
/** * Queries the database. * The xapian_query function queries the database using both a query string * and application-defined terms. Based on drupal-xapian * * @param string $query_string The search string. This string will * be parsed and stemmed automatically. * @param XapianDatabase $db Xapian database to connect * @param int $start An integer defining the first * document to return * @param int $length The number of results to return. * @param array $extra An array containing arrays of * extra terms to search for. * @param int $count_type Number of items to retrieve * @return array An array of nids corresponding to the results. */ function xapian_query($query_string, $db = NULL, $start = 0, $length = 10, $extra = array(), $count_type = 0) { try { if (!is_object($db)) { $db = new XapianDatabase(XAPIAN_DB); } // Build subqueries from $extra array. Now only used by tags search filter on search widget $subqueries = array(); foreach ($extra as $subquery) { if (!empty($subquery)) { $subqueries[] = new XapianQuery($subquery); } } $query = NULL; $enquire = new XapianEnquire($db); if (!empty($query_string)) { $query_parser = new XapianQueryParser(); //TODO: choose stemmer $stemmer = new XapianStem("english"); $query_parser->set_stemmer($stemmer); $query_parser->set_database($db); $query_parser->set_stemming_strategy(XapianQueryParser::STEM_SOME); $query_parser->add_boolean_prefix('courseid', XAPIAN_PREFIX_COURSEID); $query_parser->add_boolean_prefix('toolid', XAPIAN_PREFIX_TOOLID); $query = $query_parser->parse_query($query_string); $final_array = array_merge($subqueries, array($query)); $query = new XapianQuery(XapianQuery::OP_AND, $final_array); } else { $query = new XapianQuery(XapianQuery::OP_OR, $subqueries); } $enquire->set_query($query); $matches = $enquire->get_mset((int) $start, (int) $length); $specific_fields = get_specific_field_list(); $results = array(); $i = $matches->begin(); // Display the results. //echo $matches->get_matches_estimated().'results found'; $count = 0; while (!$i->equals($matches->end())) { $count++; $document = $i->get_document(); if (is_object($document)) { // process one item terms $courseid_terms = xapian_get_doc_terms($document, XAPIAN_PREFIX_COURSEID); $results[$count]['courseid'] = substr($courseid_terms[0]['name'], 1); $toolid_terms = xapian_get_doc_terms($document, XAPIAN_PREFIX_TOOLID); $results[$count]['toolid'] = substr($toolid_terms[0]['name'], 1); // process each specific field prefix foreach ($specific_fields as $specific_field) { $results[$count]['sf-' . $specific_field['code']] = xapian_get_doc_terms($document, $specific_field['code']); } // rest of data $results[$count]['xapian_data'] = unserialize($document->get_data()); $results[$count]['score'] = $i->get_percent(); } $i->next(); } switch ($count_type) { case 1: // Lower bound $count = $matches->get_matches_lower_bound(); break; case 2: // Upper bound $count = $matches->get_matches_upper_bound(); break; case 0: // Best estimate // Best estimate default: $count = $matches->get_matches_estimated(); break; } return array($count, $results); } catch (Exception $e) { display_xapian_error($e->getMessage()); return NULL; } }
function tests(){ include_once("ressources/class.xapian.inc"); // Open the database for searching. try { $database = new XapianDatabase("/home/dtouzeau/Documents/doc1.db"); $database1=new XapianDatabase("/home/dtouzeau/Documents/doc1.db"); $database->add_database($database1); // Start an enquire session. $enquire = new XapianEnquire($database); // Combine the rest of the command line arguments with spaces between // them, so that simple queries don't have to be quoted at the shell // level. $query_string = "david"; $qp = new XapianQueryParser(); $stemmer = new XapianStem("english"); $qp->set_stemmer($stemmer); $qp->set_database($database); $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME); $query = $qp->parse_query($query_string); print "Parsed query is: {$query->get_description()}\n"; // Find the top 10 results for the query. $enquire->set_query($query); $matches = $enquire->get_mset(0, 10); // Display the results. print "{$matches->get_matches_estimated()} results found:\n"; $i = $matches->begin(); while (!$i->equals($matches->end())) { $n = $i->get_rank() + 1; $data = $i->get_document()->get_data(); print "$n: {$i->get_percent()}% docid={$i->get_docid()} [$data]\n\n"; $i->next(); } } catch (Exception $e) { print $e->getMessage() . "\n"; exit(1); } }
} # Test MultiValueKeyMaker. $doc = new XapianDocument(); $doc->add_term("foo"); $doc->add_value(0, "ABB"); $db2->add_document($doc); $doc->add_value(0, "ABC"); $db2->add_document($doc); $doc->add_value(0, "ABC"); $db2->add_document($doc); $doc->add_value(0, "ABCD"); $db2->add_document($doc); $doc->add_value(0, "ABCÿ"); $db2->add_document($doc); $enquire = new XapianEnquire($db2); $enquire->set_query(new XapianQuery("foo")); $sorter = new XapianMultiValueKeyMaker(); $sorter->add_value(0); $enquire->set_sort_by_key($sorter, true); $mset = $enquire->get_mset(0, 10); mset_expect_order($mset, array(5, 4, 3, 2, 1)); $sorter = new XapianMultiValueKeyMaker(); $sorter->add_value(0, true); $enquire->set_sort_by_key($sorter, true); $mset = $enquire->get_mset(0, 10); mset_expect_order($mset, array(1, 2, 3, 4, 5)); $sorter = new XapianMultiValueKeyMaker(); $sorter->add_value(0); $sorter->add_value(1); $enquire->set_sort_by_key($sorter, true); $mset = $enquire->get_mset(0, 10);
function search($dbpath, $querystring, $materials, $offset = 0, $pagesize = 10) { // offset - defines starting point within result set // pagesize - defines number of records to retrieve // Open the database we're going to search. $db = new XapianDatabase($dbpath); ### Start of example code. // Set up a QueryParser with a stemmer and suitable prefixes $queryparser = new XapianQueryParser(); $queryparser->set_stemmer(new XapianStem("english")); $queryparser->set_stem_strategy(XapianQueryParser::STEM_SOME); $queryparser->add_prefix("title", "S"); $queryparser->add_prefix("description", "XD"); // And parse the query $query = $queryparser->parse_query($querystring); if (empty($materials) === false) { // Filter the results to ones which contain at least one of the // materials. $material_queries = array(); // Build a query for each material value foreach ($materials as $material) { $material = str_replace("material:", "", $material); $material_queries[] = new XapianQuery('XM' . strtolower($material)); } // Combine these queries with an OR operator $material_query = new XapianQuery(XapianQuery::OP_AND, $material_queries); // Use the material query to filter the main query $query = new XapianQuery(XapianQuery::OP_FILTER, $query, $material_query); } ### End of example code. // Use an Enquire object on the database to run the query $enquire = new XapianEnquire($db); $enquire->set_query($query); // Set up a spy to inspect the MAKER value at slot 1 $spy = new XapianValueCountMatchSpy(1); $enquire->add_matchspy($spy); // Retrieve the matches and compute start and end points $matches = $enquire->get_mset($offset, $pagesize); $start = $matches->begin(); $end = $matches->end(); $index = 0; // Use an array to record the DocIds of each match $docids = array(); while (!$start->equals($end)) { // retrieve the document and its data $doc = $start->get_document(); $fields = json_decode($doc->get_data()); $position = $offset + $index + 1; // record the docid $docid = $start->get_docid(); $docids[] = $docid; // display the results print sprintf("%d: #%03d %s\n", $position, $docid, $fields->TITLE); // increment MSet iterator and our counter $start->next(); $index++; } // Parse and display the spy values $spy_start = $spy->values_begin(); $spy_end = $spy->values_end(); while (!$spy_start->equals($spy_end)) { print sprintf("Facet: %s; count: %d\n", $spy_start->get_term(), $spy_start->get_termfreq()); $spy_start->next(); } // Finally, make sure we log the query and displayed results log_info(sprintf("xapian.search:'%s'[%d:%d] = %s", $querystring, $offset, $offset + $pagesize, implode(" ", $docids))); }
public function search($query_string) { $database = new XapianDatabase(self::$_database_path); // Start an enquire session. $enquire = new XapianEnquire($database); $qp = new XapianQueryParser(); $stemmer = new XapianStem("english"); $qp->set_stemmer($stemmer); $qp->set_database($database); $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME); $query = $qp->parse_query($query_string); // Find the top 10 results for the query. $enquire->set_query($query); $enquire->set_collapse_key(0, 1); //index '0' holds the file path, so we're collapsing on that value in order for a single value to be returned by the system $matches = $enquire->get_mset(0, $database->get_doccount()); $i = $matches->begin(); $results = array(); while (!$i->equals($matches->end())) { $n = $i->get_rank() + 1; try { $fileobj = new EfrontFile($i->get_document()->get_value('file')); $results[] = array('id' => $fileobj['id'], 'path' => str_replace(G_ROOTPATH, '', $fileobj['path']), 'login' => $fileobj['users_LOGIN'] ? $fileobj['users_LOGIN'] : '', 'date' => formatTimestamp(filemtime($fileobj['path']), 'time_nosec'), 'name' => $fileobj['name'], 'extension' => $fileobj['extension'], 'score' => $i->get_percent(), 'content' => $i->get_document()->get_data(), 'icon' => $fileobj->getTypeImage()); } catch (Exception $e) { //don't halt for missing files } $i->next(); } return $results; }
/** * Return a list of posts that are similar to the current post */ public function get_similar_posts($post, $max_recommended = 5) { $guid = $this->get_uid($post); $posting = $this->_database->postlist_begin($guid); $enquire = new XapianEnquire($this->_database); $rset = new XapianRset(); $rset->add_document($posting->get_docid()); $eset = $enquire->get_eset(20, $rset); $i = $eset->begin(); $terms = array(); while (!$i->equals($eset->end())) { $terms[] = $i->get_term(); $i->next(); } $query = new XapianQuery(XapianQuery::OP_OR, $terms); $enquire->set_query($query); $matches = $enquire->get_mset(0, $max_recommended + 1); $ids = array(); $i = $matches->begin(); while (!$i->equals($matches->end())) { $n = $i->get_rank() + 1; if ($i->get_document()->get_value(self::XAPIAN_FIELD_ID) != $post->id) { $ids[] = $i->get_document()->get_value(self::XAPIAN_FIELD_ID); } $i->next(); } return $ids; }
function search($query, $num = 20) { $db = new XapianDatabase(XAPIAN_DIR); $enquire = new XapianEnquire($db); $stemmer = new XapianStem("english"); $qp = new XapianQueryParser(); $valuerange = new XapianNumberValueRangeProcessor(0); $qp->set_stemmer($stemmer); $qp->set_database($db); $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME); $qp->set_default_op(Query_OP_AND); $qp->add_boolean_prefix('align', 'A'); $qp->add_boolean_prefix('colour', 'C'); $qp->add_boolean_prefix('ep', 'E'); $qp->add_boolean_prefix('noise', 'N'); $qp->add_boolean_prefix('series', 'S'); $qp->add_valuerangeprocessor($valuerange); $query = $qp->parse_query($query, XapianQueryParser::FLAG_BOOLEAN | XapianQueryParser::FLAG_PHRASE | XapianQueryParser::FLAG_LOVEHATE | XapianQueryParser::FLAG_WILDCARD | XapianQueryParser::FLAG_SPELLING_CORRECTION); $enquire->set_query($query); $enquire->set_sort_by_value(1, true); $matches = $enquire->get_mset(0, $num); $desc = $query->get_description(); $estimate = $matches->get_matches_estimated(); $out = array(); $iter = $matches->begin(); while (!$iter->equals($matches->end())) { $doc = $iter->get_document(); $data = array('text' => $doc->get_data()); $rank = $iter->get_rank() + 1; $termiter = $doc->termlist_begin(); $terms = array(); while (!$termiter->equals($doc->termlist_end())) { $term = $termiter->get_term(); $prefix = substr($term, 0, 1); if ($prefix == 'A') { $data['align'] = substr($term, 1); } elseif ($prefix == 'B') { $data['begin'] = substr($term, 1); } elseif ($prefix == 'C') { $data['colour'] = substr($term, 1); } elseif ($prefix == 'E') { $data['ep'] = substr($term, 1); } elseif ($prefix == 'N') { $data['noise'] = substr($term, 1); } elseif ($prefix == 'I') { $data['pos'] = $term; } elseif ($prefix == 'S') { $data['series'] = substr($term, 1); } else { $data['terms'][] = $term; } $termiter->next(); } $out[] = $data; $iter->next(); } $db = null; return array( 'query' => $desc, 'estimate' => $estimate, 'data' => $out, ); }