/** * * @brief xapian query * @param array $params * @param int $page * @param int $pagesize */ public static function searchHospital($disease = '', $province = '', $city = '', $district = '', $street = '', $level = 0, $keyword = '', $page = '', $pagesize = 20, $id = '') { $page = empty($page) ? 1 : $page; if (!self::_connect('/var/www/html/diary/xapian/hospital')) { return false; } // if ($keyword) { $realQuery = DBScws::cutWord($keyword, true); } // if ($disease) { $realQuery[] = 'DISEASE' . $disease; } // if ($province) { $realQuery[] = 'PROVINCE' . $province; } // if ($city) { $realQuery[] = 'CITY' . $city; } // if ($district) { $realQuery[] = 'DISTRICT' . $district; } // if ($street) { $realQuery[] = 'STREET' . $street; } // if ($level) { $realQuery[] = 'LEVEL' . $level; } //id if ($id) { $realQuery[] = 'ID' . $id; } if (empty($realQuery)) { $realQuery[] = 'HOSPITAL' . 'default'; } $queryparser = new XapianQueryParser(); $queryparser->set_database(self::$_INSTANCE); $enquire = new XapianEnquire(self::$_INSTANCE); $enquire->set_sort_by_value(0); // $query = new XapianQuery(XapianQuery::OP_AND, $realQuery); $enquire->set_query($query); $matches = $enquire->get_mset(0, 4000); // $start = $matches->begin(); $end = $matches->end(); $count = $matches->size(); $index = 0; $re = array(); while (!$start->equals($end)) { $data = array(); if ($index < $page * $pagesize && $index >= ($page - 1) * $pagesize) { $doc = $start->get_document(); $result = json_decode($doc->get_data(), true); $result['phone'] = $result['contact']; $r = explode("||", $result['title']); $result['title'] = $r[0]; $r = explode(",", $result['contact']); $result['contact'] = $r[0]; unset($result['puid'], $result['thumb_img'], $result['score'], $result['website'], $result['post_at'], $result['refresh_at'], $result['grab_url'], $result['ad_status'], $result['ad_types'], $result['user_id'], $result['username'], $result['listing_status'], $result['base_tag'], $result['image_count']); $re[] = $result; /* $termStart = $doc->termlist_begin(); $termEnd = $doc->termlist_end(); $d['term']=''; while (!($termStart->equals($termEnd))) { $d['term'].= '|'.$termStart->get_term(); $termStart->next(); } var_dump($d); */ } elseif ($index >= $page * $pagesize) { break; } $start->next(); $index++; } return array($re, $count); }
// them, so that simple queries don't have to be quoted at the shell // level. $args = array_slice($argv, 2); $separator = array_search("--", $args); if ($separator === FALSE) { $separator = count($args); } $query_string = join(" ", array_slice($args, 0, $separator)); $rset = new XapianRSet(); foreach (array_slice($args, $separator + 1) as $docid) { $rset->add_document(intval($docid)); } $qp = new XapianQueryParser(); $stemmer = new XapianStem("english"); $qp->set_stemmer($stemmer); $qp->set_database($database); $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME); $query = $qp->parse_query($query_string); print "Parsed query is: {$query->get_description()}\n"; // Find the top 10 results for the query. $enquire->set_query($query); $matches = $enquire->get_mset(0, 10, $rset); // Display the results. print "{$matches->get_matches_estimated()} results found:\n"; foreach ($matches->begin() as $i => $docid) { $n = $i->get_rank() + 1; $data = $i->get_document()->get_data(); print "{$n}: {$i->get_percent()}% docid={$docid} [{$data}]\n\n"; } // If no relevant docids were given, invent an RSet containing the top 5 // matches (or all the matches if there are less than 5).
/** * Queries the database. * The xapian_query function queries the database using both a query string * and application-defined terms. Based on drupal-xapian * * @param string $query_string The search string. This string will * be parsed and stemmed automatically. * @param XapianDatabase $db Xapian database to connect * @param int $start An integer defining the first * document to return * @param int $length The number of results to return. * @param array $extra An array containing arrays of * extra terms to search for. * @param int $count_type Number of items to retrieve * @return array An array of nids corresponding to the results. */ function xapian_query($query_string, $db = NULL, $start = 0, $length = 10, $extra = array(), $count_type = 0) { try { if (!is_object($db)) { $db = new XapianDatabase(XAPIAN_DB); } // Build subqueries from $extra array. Now only used by tags search filter on search widget $subqueries = array(); foreach ($extra as $subquery) { if (!empty($subquery)) { $subqueries[] = new XapianQuery($subquery); } } $query = NULL; $enquire = new XapianEnquire($db); if (!empty($query_string)) { $query_parser = new XapianQueryParser(); //TODO: choose stemmer $stemmer = new XapianStem("english"); $query_parser->set_stemmer($stemmer); $query_parser->set_database($db); $query_parser->set_stemming_strategy(XapianQueryParser::STEM_SOME); $query_parser->add_boolean_prefix('courseid', XAPIAN_PREFIX_COURSEID); $query_parser->add_boolean_prefix('toolid', XAPIAN_PREFIX_TOOLID); $query = $query_parser->parse_query($query_string); $final_array = array_merge($subqueries, array($query)); $query = new XapianQuery(XapianQuery::OP_AND, $final_array); } else { $query = new XapianQuery(XapianQuery::OP_OR, $subqueries); } $enquire->set_query($query); $matches = $enquire->get_mset((int) $start, (int) $length); $specific_fields = get_specific_field_list(); $results = array(); $i = $matches->begin(); // Display the results. //echo $matches->get_matches_estimated().'results found'; $count = 0; while (!$i->equals($matches->end())) { $count++; $document = $i->get_document(); if (is_object($document)) { // process one item terms $courseid_terms = xapian_get_doc_terms($document, XAPIAN_PREFIX_COURSEID); $results[$count]['courseid'] = substr($courseid_terms[0]['name'], 1); $toolid_terms = xapian_get_doc_terms($document, XAPIAN_PREFIX_TOOLID); $results[$count]['toolid'] = substr($toolid_terms[0]['name'], 1); // process each specific field prefix foreach ($specific_fields as $specific_field) { $results[$count]['sf-' . $specific_field['code']] = xapian_get_doc_terms($document, $specific_field['code']); } // rest of data $results[$count]['xapian_data'] = unserialize($document->get_data()); $results[$count]['score'] = $i->get_percent(); } $i->next(); } switch ($count_type) { case 1: // Lower bound $count = $matches->get_matches_lower_bound(); break; case 2: // Upper bound $count = $matches->get_matches_upper_bound(); break; case 0: // Best estimate // Best estimate default: $count = $matches->get_matches_estimated(); break; } return array($count, $results); } catch (Exception $e) { display_xapian_error($e->getMessage()); return NULL; } }
function tests(){ include_once("ressources/class.xapian.inc"); // Open the database for searching. try { $database = new XapianDatabase("/home/dtouzeau/Documents/doc1.db"); $database1=new XapianDatabase("/home/dtouzeau/Documents/doc1.db"); $database->add_database($database1); // Start an enquire session. $enquire = new XapianEnquire($database); // Combine the rest of the command line arguments with spaces between // them, so that simple queries don't have to be quoted at the shell // level. $query_string = "david"; $qp = new XapianQueryParser(); $stemmer = new XapianStem("english"); $qp->set_stemmer($stemmer); $qp->set_database($database); $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME); $query = $qp->parse_query($query_string); print "Parsed query is: {$query->get_description()}\n"; // Find the top 10 results for the query. $enquire->set_query($query); $matches = $enquire->get_mset(0, 10); // Display the results. print "{$matches->get_matches_estimated()} results found:\n"; $i = $matches->begin(); while (!$i->equals($matches->end())) { $n = $i->get_rank() + 1; $data = $i->get_document()->get_data(); print "$n: {$i->get_percent()}% docid={$i->get_docid()} [$data]\n\n"; $i->next(); } } catch (Exception $e) { print $e->getMessage() . "\n"; exit(1); } }
public function search($query_string) { $database = new XapianDatabase(self::$_database_path); // Start an enquire session. $enquire = new XapianEnquire($database); $qp = new XapianQueryParser(); $stemmer = new XapianStem("english"); $qp->set_stemmer($stemmer); $qp->set_database($database); $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME); $query = $qp->parse_query($query_string); // Find the top 10 results for the query. $enquire->set_query($query); $enquire->set_collapse_key(0, 1); //index '0' holds the file path, so we're collapsing on that value in order for a single value to be returned by the system $matches = $enquire->get_mset(0, $database->get_doccount()); $i = $matches->begin(); $results = array(); while (!$i->equals($matches->end())) { $n = $i->get_rank() + 1; try { $fileobj = new EfrontFile($i->get_document()->get_value('file')); $results[] = array('id' => $fileobj['id'], 'path' => str_replace(G_ROOTPATH, '', $fileobj['path']), 'login' => $fileobj['users_LOGIN'] ? $fileobj['users_LOGIN'] : '', 'date' => formatTimestamp(filemtime($fileobj['path']), 'time_nosec'), 'name' => $fileobj['name'], 'extension' => $fileobj['extension'], 'score' => $i->get_percent(), 'content' => $i->get_document()->get_data(), 'icon' => $fileobj->getTypeImage()); } catch (Exception $e) { //don't halt for missing files } $i->next(); } return $results; }
/** * Return a list of IDs for the given search criters * * @param string $criteria * @param int $limit * @param int $offset * @return array */ public function get_by_criteria($criteria, $limit, $offset) { $qp = new XapianQueryParser(); $enquire = new XapianEnquire($this->_database); if ($this->get_stem_locale()) { // Note, there may be a problem if this is different than at indexing time! $stemmer = new XapianStem($this->get_stem_locale()); $qp->set_stemmer($stemmer); $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME); } $qp->set_database($this->_database); $query = $qp->parse_query($criteria, XapianQueryParser::FLAG_SPELLING_CORRECTION); $enquire->set_query($query); $this->_spelling = $qp->get_corrected_query_string(); $matches = $enquire->get_mset($offset, $limit); // TODO: get count from $matches->get_matches_estimated() instead of current method $i = $matches->begin(); $ids = array(); while (!$i->equals($matches->end())) { $n = $i->get_rank() + 1; $ids[] = $i->get_document()->get_value(self::XAPIAN_FIELD_ID); $i->next(); } return $ids; }
function search($query, $num = 20) { $db = new XapianDatabase(XAPIAN_DIR); $enquire = new XapianEnquire($db); $stemmer = new XapianStem("english"); $qp = new XapianQueryParser(); $valuerange = new XapianNumberValueRangeProcessor(0); $qp->set_stemmer($stemmer); $qp->set_database($db); $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME); $qp->set_default_op(Query_OP_AND); $qp->add_boolean_prefix('align', 'A'); $qp->add_boolean_prefix('colour', 'C'); $qp->add_boolean_prefix('ep', 'E'); $qp->add_boolean_prefix('noise', 'N'); $qp->add_boolean_prefix('series', 'S'); $qp->add_valuerangeprocessor($valuerange); $query = $qp->parse_query($query, XapianQueryParser::FLAG_BOOLEAN | XapianQueryParser::FLAG_PHRASE | XapianQueryParser::FLAG_LOVEHATE | XapianQueryParser::FLAG_WILDCARD | XapianQueryParser::FLAG_SPELLING_CORRECTION); $enquire->set_query($query); $enquire->set_sort_by_value(1, true); $matches = $enquire->get_mset(0, $num); $desc = $query->get_description(); $estimate = $matches->get_matches_estimated(); $out = array(); $iter = $matches->begin(); while (!$iter->equals($matches->end())) { $doc = $iter->get_document(); $data = array('text' => $doc->get_data()); $rank = $iter->get_rank() + 1; $termiter = $doc->termlist_begin(); $terms = array(); while (!$termiter->equals($doc->termlist_end())) { $term = $termiter->get_term(); $prefix = substr($term, 0, 1); if ($prefix == 'A') { $data['align'] = substr($term, 1); } elseif ($prefix == 'B') { $data['begin'] = substr($term, 1); } elseif ($prefix == 'C') { $data['colour'] = substr($term, 1); } elseif ($prefix == 'E') { $data['ep'] = substr($term, 1); } elseif ($prefix == 'N') { $data['noise'] = substr($term, 1); } elseif ($prefix == 'I') { $data['pos'] = $term; } elseif ($prefix == 'S') { $data['series'] = substr($term, 1); } else { $data['terms'][] = $term; } $termiter->next(); } $out[] = $data; $iter->next(); } $db = null; return array( 'query' => $desc, 'estimate' => $estimate, 'data' => $out, ); }