Ejemplo n.º 1
0
function search($dbpath, $querystring, $offset = 0, $pagesize = 10)
{
    // offset - defines starting point within result set
    // pagesize - defines number of records to retrieve
    // Open the database we're going to search.
    $db = new XapianDatabase($dbpath);
    ### Start of example code.
    // Set up a QueryParser with a stemmer and suitable prefixes
    $queryparser = new XapianQueryParser();
    $queryparser->set_stemmer(new XapianStem("en"));
    $queryparser->set_stemming_strategy(XapianQueryParser::STEM_SOME);
    $queryparser->add_prefix("title", "S");
    $queryparser->add_prefix("description", "XD");
    $queryparser->add_boolean_prefix("material", "XM");
    // And parse the query
    $query = $queryparser->parse_query($querystring);
    ### End of example code.
    // Use an Enquire object on the database to run the query
    $enquire = new XapianEnquire($db);
    $enquire->set_query($query);
    // Set up a spy to inspect the MAKER value at slot 1
    $spy = new XapianValueCountMatchSpy(1);
    $enquire->add_matchspy($spy);
    // Retrieve the matches and compute start and end points
    $matches = $enquire->get_mset($offset, $pagesize);
    $start = $matches->begin();
    $end = $matches->end();
    // Use an array to record the DocIds of each match
    $docids = array();
    while (!$start->equals($end)) {
        // retrieve the document and its data
        $doc = $start->get_document();
        $fields = json_decode($doc->get_data());
        $position = $start->get_rank() + 1;
        // record the docid
        $docid = $start->get_docid();
        $docids[] = $docid;
        // display the results
        printf("%d: #%03d %s\n", $position, $docid, $fields->TITLE);
        // increment MSet iterator and our counter
        $start->next();
    }
    // Parse and display the spy values
    $spy_start = $spy->values_begin();
    $spy_end = $spy->values_end();
    while (!$spy_start->equals($spy_end)) {
        print sprintf("Facet: %s; count: %d\n", $spy_start->get_term(), $spy_start->get_termfreq());
        $spy_start->next();
    }
    // Finally, make sure we log the query and displayed results
    log_info(sprintf("xapian.search:'%s'[%d:%d] = %s", $querystring, $offset, $offset + $pagesize, implode(" ", $docids)));
}
Ejemplo n.º 2
0
/**
 * Queries the database.
 * The xapian_query function queries the database using both a query string
 * and application-defined terms. Based on drupal-xapian
 *
 * @param   string          $query_string   The search string. This string will
 *                                          be parsed and stemmed automatically.
 * @param   XapianDatabase  $db             Xapian database to connect
 * @param   int             $start          An integer defining the first
 *                                          document to return
 * @param   int             $length         The number of results to return.
 * @param   array           $extra          An array containing arrays of
 *                                          extra terms to search for.
 * @param   int             $count_type     Number of items to retrieve
 * @return  array                           An array of nids corresponding to the results.
 */
function xapian_query($query_string, $db = NULL, $start = 0, $length = 10, $extra = array(), $count_type = 0)
{
    try {
        if (!is_object($db)) {
            $db = new XapianDatabase(XAPIAN_DB);
        }
        // Build subqueries from $extra array. Now only used by tags search filter on search widget
        $subqueries = array();
        foreach ($extra as $subquery) {
            if (!empty($subquery)) {
                $subqueries[] = new XapianQuery($subquery);
            }
        }
        $query = NULL;
        $enquire = new XapianEnquire($db);
        if (!empty($query_string)) {
            $query_parser = new XapianQueryParser();
            //TODO: choose stemmer
            $stemmer = new XapianStem("english");
            $query_parser->set_stemmer($stemmer);
            $query_parser->set_database($db);
            $query_parser->set_stemming_strategy(XapianQueryParser::STEM_SOME);
            $query_parser->add_boolean_prefix('courseid', XAPIAN_PREFIX_COURSEID);
            $query_parser->add_boolean_prefix('toolid', XAPIAN_PREFIX_TOOLID);
            $query = $query_parser->parse_query($query_string);
            $final_array = array_merge($subqueries, array($query));
            $query = new XapianQuery(XapianQuery::OP_AND, $final_array);
        } else {
            $query = new XapianQuery(XapianQuery::OP_OR, $subqueries);
        }
        $enquire->set_query($query);
        $matches = $enquire->get_mset((int) $start, (int) $length);
        $specific_fields = get_specific_field_list();
        $results = array();
        $i = $matches->begin();
        // Display the results.
        //echo $matches->get_matches_estimated().'results found';
        $count = 0;
        while (!$i->equals($matches->end())) {
            $count++;
            $document = $i->get_document();
            if (is_object($document)) {
                // process one item terms
                $courseid_terms = xapian_get_doc_terms($document, XAPIAN_PREFIX_COURSEID);
                $results[$count]['courseid'] = substr($courseid_terms[0]['name'], 1);
                $toolid_terms = xapian_get_doc_terms($document, XAPIAN_PREFIX_TOOLID);
                $results[$count]['toolid'] = substr($toolid_terms[0]['name'], 1);
                // process each specific field prefix
                foreach ($specific_fields as $specific_field) {
                    $results[$count]['sf-' . $specific_field['code']] = xapian_get_doc_terms($document, $specific_field['code']);
                }
                // rest of data
                $results[$count]['xapian_data'] = unserialize($document->get_data());
                $results[$count]['score'] = $i->get_percent();
            }
            $i->next();
        }
        switch ($count_type) {
            case 1:
                // Lower bound
                $count = $matches->get_matches_lower_bound();
                break;
            case 2:
                // Upper bound
                $count = $matches->get_matches_upper_bound();
                break;
            case 0:
                // Best estimate
            // Best estimate
            default:
                $count = $matches->get_matches_estimated();
                break;
        }
        return array($count, $results);
    } catch (Exception $e) {
        display_xapian_error($e->getMessage());
        return NULL;
    }
}
Ejemplo n.º 3
0
function search($query, $num = 20) {
	$db = new XapianDatabase(XAPIAN_DIR);
	$enquire = new XapianEnquire($db);

	$stemmer = new XapianStem("english");
	$qp = new XapianQueryParser();
	$valuerange = new XapianNumberValueRangeProcessor(0);

	$qp->set_stemmer($stemmer);
	$qp->set_database($db);
	$qp->set_stemming_strategy(XapianQueryParser::STEM_SOME);
	$qp->set_default_op(Query_OP_AND);
	$qp->add_boolean_prefix('align', 'A');
	$qp->add_boolean_prefix('colour', 'C');
	$qp->add_boolean_prefix('ep', 'E');
	$qp->add_boolean_prefix('noise', 'N');
	$qp->add_boolean_prefix('series', 'S');
	$qp->add_valuerangeprocessor($valuerange);

	$query = $qp->parse_query($query, XapianQueryParser::FLAG_BOOLEAN | XapianQueryParser::FLAG_PHRASE |
        	XapianQueryParser::FLAG_LOVEHATE | XapianQueryParser::FLAG_WILDCARD |
	        XapianQueryParser::FLAG_SPELLING_CORRECTION);

	$enquire->set_query($query);
	$enquire->set_sort_by_value(1, true);
	$matches = $enquire->get_mset(0, $num);

	$desc = $query->get_description();
	$estimate = $matches->get_matches_estimated();

	$out = array();
	$iter = $matches->begin();
	while (!$iter->equals($matches->end())) {
		$doc = $iter->get_document();
		$data = array('text' => $doc->get_data());
		$rank = $iter->get_rank() + 1;
		$termiter = $doc->termlist_begin();
		$terms = array();
		while (!$termiter->equals($doc->termlist_end())) {
			$term = $termiter->get_term();
			$prefix = substr($term, 0, 1);
			if ($prefix == 'A') {
				$data['align'] = substr($term, 1);
			} elseif ($prefix == 'B') {
				$data['begin'] = substr($term, 1);
			} elseif ($prefix == 'C') {
				$data['colour'] = substr($term, 1);
			} elseif ($prefix == 'E') {
				$data['ep'] = substr($term, 1);
			} elseif ($prefix == 'N') {
				$data['noise'] = substr($term, 1);
			} elseif ($prefix == 'I') {
				$data['pos'] = $term;
			} elseif ($prefix == 'S') {
				$data['series'] = substr($term, 1);
			} else {
				$data['terms'][] = $term;
			}
			$termiter->next();
		}
		$out[] = $data;
		$iter->next();
	}

	$db = null;

	return array(
		'query' => $desc,
		'estimate' => $estimate,
		'data' => $out,
	);
}